diff mbox series

[ovs-dev,v2,1/2] dpif-netdev: Expand the meter capacity using cmap

Message ID 1584254601-7321-1-git-send-email-xiangxia.m.yue@gmail.com
State Changes Requested
Headers show
Series [ovs-dev,v2,1/2] dpif-netdev: Expand the meter capacity using cmap | expand

Commit Message

Tonghao Zhang March 15, 2020, 6:43 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

For now, ovs-vswitchd use the array of the dp_meter struct
to store meter's data, and at most, there are only 65536
(defined by MAX_METERS) meters that can be used. But in some
case, for example, in the edge gateway, we should use 200,000+,
at least, meters for IP address bandwidth limitation.
Every one IP address will use two meters for its rx and tx
path[1]. In other way, ovs-vswitchd should support meter-offload
(rte_mtr_xxx api introduced by dpdk.), but there are more than
65536 meters in the hardware, such as Mellanox ConnectX-6.

This patch use cmap to manage the meter, instead of the array.

* Insertion performance, ovs-ofctl add-meter 1000+ meters,
  the cmap takes abount 4000ms, as same as previous implementation.
* Lookup performance in datapath, we add 1000+ meter which rate is
  10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
  packets.), and a flow which only forwarding the packets from p0
  to p1, with meter action[2]. On other server, the pktgen-dpdk
  will generate 64B packets to p0.
  The forwarding performance is 4,814,400 pps. Without this path,
  4,935,584 pps. There are about 1% performance loss. For addressing
  this issue, next patch add a meter cache.

[1].
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

[2].
$ in_port=p0 action=meter:100,output:p1

Cc: Ben Pfaff <blp@ovn.org>
Cc: Jarno Rajahalme <jarno@ovn.org>
Cc: Ilya Maximets <i.maximets@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 lib/dpif-netdev.c | 199 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 130 insertions(+), 69 deletions(-)

Comments

Yanqin Wei March 16, 2020, 3:07 a.m. UTC | #1
Hi Xiangxia,

The meter id is allocated by id_pool,  which can always return the lowest available id.
There is a light scalable data struct which supports direct address lookup. It can achieve several times performance improvement than cmap_find. And it has not copy memory when expanding.
https://patchwork.ozlabs.org/patch/1253447/

I suggest using this light data struct for >65535 meter instance. Would you like to take a look at it?

Best Regards,
Wei Yanqin

> -----Original Message-----
> From: dev <ovs-dev-bounces@openvswitch.org> On Behalf Of
> xiangxia.m.yue@gmail.com
> Sent: Sunday, March 15, 2020 2:43 PM
> To: dev@openvswitch.org
> Cc: Ilya Maximets <i.maximets@ovn.org>
> Subject: [ovs-dev] [PATCH v2 1/2] dpif-netdev: Expand the meter capacity
> using cmap
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> For now, ovs-vswitchd use the array of the dp_meter struct to store meter's
> data, and at most, there are only 65536 (defined by MAX_METERS) meters
> that can be used. But in some case, for example, in the edge gateway, we
> should use 200,000+, at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx path[1]. In other way,
> ovs-vswitchd should support meter-offload (rte_mtr_xxx api introduced by
> dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
>
> This patch use cmap to manage the meter, instead of the array.
>
> * Insertion performance, ovs-ofctl add-meter 1000+ meters,
>   the cmap takes abount 4000ms, as same as previous implementation.
> * Lookup performance in datapath, we add 1000+ meter which rate is
>   10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
>   packets.), and a flow which only forwarding the packets from p0
>   to p1, with meter action[2]. On other server, the pktgen-dpdk
>   will generate 64B packets to p0.
>   The forwarding performance is 4,814,400 pps. Without this path,
>   4,935,584 pps. There are about 1% performance loss. For addressing
>   this issue, next patch add a meter cache.
>
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
>
> [2].
> $ in_port=p0 action=meter:100,output:p1
>
> Cc: Ben Pfaff <blp@ovn.org>
> Cc: Jarno Rajahalme <jarno@ovn.org>
> Cc: Ilya Maximets <i.maximets@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
>  lib/dpif-netdev.c | 199 +++++++++++++++++++++++++++++++++++-----------------
> --
>  1 file changed, 130 insertions(+), 69 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d393aab..5474d52
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -98,9 +98,11 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t,
> recirc_depth, 0)
>
>  /* Configuration parameters. */
>  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table.
> */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> +
> +/* Maximum number of meters in the table. */ #define METER_ENTRY_MAX
> (1
> +<< 19)
> +/* Maximum number of bands / meter. */
> +#define METER_BAND_MAX (8)
>
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -280,6 +282,9 @@ struct dp_meter_band {  };
>
>  struct dp_meter {
> +    struct cmap_node node;
> +    struct ovs_mutex lock;
> +    uint32_t id;
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> @@ -289,6 +294,12 @@ struct dp_meter {
>      struct dp_meter_band bands[];
>  };
>
> +struct dp_netdev_meter {
> +    struct cmap table OVS_GUARDED;
> +    struct ovs_mutex lock;  /* Used for meter table. */
> +    uint32_t hash_basis;
> +};
> +
>  struct pmd_auto_lb {
>      bool auto_lb_requested;     /* Auto load balancing requested by user. */
>      bool is_enabled;            /* Current status of Auto load balancing. */
> @@ -329,8 +340,7 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct dp_netdev_meter *meter;
>
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -378,19 +388,6 @@ struct dp_netdev {
>      struct pmd_auto_lb pmd_alb;
>  };
>
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct
> dp_netdev *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1523,6 +1520,84 @@ choose_port(struct dp_netdev *dp, const char
> *name)
>      return ODPP_NONE;
>  }
>
> +static uint32_t
> +dp_meter_hash(uint32_t meter_id, uint32_t basis) {
> +    return hash_bytes32(&meter_id, sizeof meter_id, basis); }
> +
> +static void
> +dp_netdev_meter_init(struct dp_netdev *dp) {
> +    struct dp_netdev_meter *dp_meter;
> +
> +    dp_meter = xmalloc(sizeof *dp_meter);
> +
> +    cmap_init(&dp_meter->table);
> +    ovs_mutex_init(&dp_meter->lock);
> +    dp_meter->hash_basis = random_uint32();
> +
> +    dp->meter = dp_meter;
> +}
> +
> +static void
> +dp_netdev_meter_destroy(struct dp_netdev *dp) {
> +    struct dp_netdev_meter *dp_meter = dp->meter;
> +    struct dp_meter *meter;
> +
> +    ovs_mutex_lock(&dp_meter->lock);
> +    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
> +            cmap_remove(&dp_meter->table, &meter->node,
> +                        dp_meter_hash(meter->id,
> + dp_meter->hash_basis));
> +
> +            ovsrcu_postpone(free, meter);
> +    }
> +
> +    cmap_destroy(&dp_meter->table);
> +    ovs_mutex_unlock(&dp_meter->lock);
> +
> +    ovs_mutex_destroy(&dp_meter->lock);
> +    free(dp_meter);
> +}
> +
> +static struct dp_meter*
> +dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id) {
> +    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);
> +    struct dp_meter *meter;
> +
> +    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
> +        if (meter->id == meter_id) {
> +            return meter;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t
> meter_id)
> +                     OVS_REQUIRES(dp_meter->lock) {
> +    struct dp_meter *meter;
> +
> +    meter = dp_meter_lookup(dp_meter, meter_id);
> +    if (meter) {
> +            cmap_remove(&dp_meter->table, &meter->node,
> +                        dp_meter_hash(meter_id, dp_meter->hash_basis));
> +            ovsrcu_postpone(free, meter);
> +    }
> +}
> +
> +static void
> +dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter
> *meter)
> +                OVS_REQUIRES(dp_meter->lock) {
> +    cmap_insert(&dp_meter->table, &meter->node,
> +                dp_meter_hash(meter->id, dp_meter->hash_basis)); }
> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp) @@ -1556,9 +1631,8 @@
> create_dp_netdev(const char *name, const struct dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    /* Init meter resource. */
> +    dp_netdev_meter_init(dp);
>
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1647,16 +1721,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev
> *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
>   * through the 'dp_netdevs' shash while freeing 'dp'. */  static void @@ -
> 1694,16 +1758,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_destroy(dp);
>
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name)); @@ -5708,10 +5763,10 @@ static
> void  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
>                                 struct ofputil_meter_features *features)  {
> -    features->max_meters = MAX_METERS;
> +    features->max_meters = METER_ENTRY_MAX;
>      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
>      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> -    features->max_bands = MAX_BANDS;
> +    features->max_bands = METER_BAND_MAX;
>      features->max_color = 0;
>  }
>
> @@ -5732,14 +5787,13 @@ dp_netdev_run_meter(struct dp_netdev *dp,
> struct dp_packet_batch *packets_,
>      uint32_t exceeded_rate[NETDEV_MAX_BURST];
>      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
>
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return;
>      }
>
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(dp->meter, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>
>      /* Initialize as negative values. */ @@ -5747,6 +5801,7 @@
> dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch
> *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>
> +    ovs_mutex_lock(&meter->lock);
>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>
> @@ -5860,8 +5915,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct
> dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>
>  /* Meter set/get/del processing is still single-threaded. */ @@ -5870,11
> +5925,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id
> meter_id,
>                        struct ofputil_meter_config *config)  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev_meter *dp_meter = dp->meter;
>      uint32_t mid = meter_id.uint32;
>      struct dp_meter *meter;
>      int i;
>
> -    if (mid >= MAX_METERS) {
> +    if (mid >= METER_ENTRY_MAX) {
>          return EFBIG; /* Meter_id out of range. */
>      }
>
> @@ -5882,7 +5938,7 @@ dpif_netdev_meter_set(struct dpif *dpif,
> ofproto_meter_id meter_id,
>          return EBADF; /* Unsupported flags set */
>      }
>
> -    if (config->n_bands > MAX_BANDS) {
> +    if (config->n_bands > METER_BAND_MAX) {
>          return EINVAL;
>      }
>
> @@ -5903,6 +5959,8 @@ dpif_netdev_meter_set(struct dpif *dpif,
> ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) { @@ -5928,10 +5986,12 @@
> dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          }
>      }
>
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&dp_meter->lock);
> +
> +    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any */
> +    dp_meter_attach(dp_meter, meter);
> +
> +    ovs_mutex_unlock(&dp_meter->lock);
>
>      return 0;
>  }
> @@ -5941,22 +6001,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return EFBIG;
>      }
>
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(dp->meter, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
> -        int i = 0;
> +        int i;
> +
> +        ovs_mutex_lock(&meter->lock);
>
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count; @@ -5966,12 +6027,11 @@
> dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
>
> +        ovs_mutex_unlock(&meter->lock);
>          stats->n_bands = i;
>      }
>
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>
>  static int
> @@ -5980,15 +6040,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev_meter *dp_meter = dp->meter;
>      int error;
>
>      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&dp_meter->lock);
> +        dp_meter_detach_free(dp_meter, meter_id);
> +        ovs_mutex_unlock(&dp_meter->lock);
>      }
>      return error;
>  }
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Tonghao Zhang March 16, 2020, 4:06 a.m. UTC | #2
On Mon, Mar 16, 2020 at 11:07 AM Yanqin Wei <Yanqin.Wei@arm.com> wrote:
>
> Hi Xiangxia,
>
> The meter id is allocated by id_pool,  which can always return the lowest available id.
Yes, I added 70000+ meters, and it works fine.
> There is a light scalable data struct which supports direct address lookup. It can achieve several times performance improvement than cmap_find. And it has not copy memory when expanding.
> https://patchwork.ozlabs.org/patch/1253447/
>
> I suggest using this light data struct for >65535 meter instance. Would you like to take a look at it?
Yes, and I also wait Ilya to review, and offer a suggestion.
> Best Regards,
> Wei Yanqin
>
> > -----Original Message-----
> > From: dev <ovs-dev-bounces@openvswitch.org> On Behalf Of
> > xiangxia.m.yue@gmail.com
> > Sent: Sunday, March 15, 2020 2:43 PM
> > To: dev@openvswitch.org
> > Cc: Ilya Maximets <i.maximets@ovn.org>
> > Subject: [ovs-dev] [PATCH v2 1/2] dpif-netdev: Expand the meter capacity
> > using cmap
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > For now, ovs-vswitchd use the array of the dp_meter struct to store meter's
> > data, and at most, there are only 65536 (defined by MAX_METERS) meters
> > that can be used. But in some case, for example, in the edge gateway, we
> > should use 200,000+, at least, meters for IP address bandwidth limitation.
> > Every one IP address will use two meters for its rx and tx path[1]. In other way,
> > ovs-vswitchd should support meter-offload (rte_mtr_xxx api introduced by
> > dpdk.), but there are more than
> > 65536 meters in the hardware, such as Mellanox ConnectX-6.
> >
> > This patch use cmap to manage the meter, instead of the array.
> >
> > * Insertion performance, ovs-ofctl add-meter 1000+ meters,
> >   the cmap takes abount 4000ms, as same as previous implementation.
> > * Lookup performance in datapath, we add 1000+ meter which rate is
> >   10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
> >   packets.), and a flow which only forwarding the packets from p0
> >   to p1, with meter action[2]. On other server, the pktgen-dpdk
> >   will generate 64B packets to p0.
> >   The forwarding performance is 4,814,400 pps. Without this path,
> >   4,935,584 pps. There are about 1% performance loss. For addressing
> >   this issue, next patch add a meter cache.
> >
> > [1].
> > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
> >
> > [2].
> > $ in_port=p0 action=meter:100,output:p1
> >
> > Cc: Ben Pfaff <blp@ovn.org>
> > Cc: Jarno Rajahalme <jarno@ovn.org>
> > Cc: Ilya Maximets <i.maximets@ovn.org>
> > Cc: Andy Zhou <azhou@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > ---
> >  lib/dpif-netdev.c | 199 +++++++++++++++++++++++++++++++++++-----------------
> > --
> >  1 file changed, 130 insertions(+), 69 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d393aab..5474d52
> > 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -98,9 +98,11 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t,
> > recirc_depth, 0)
> >
> >  /* Configuration parameters. */
> >  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table.
> > */
> > -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> > -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> > -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> > +
> > +/* Maximum number of meters in the table. */ #define METER_ENTRY_MAX
> > (1
> > +<< 19)
> > +/* Maximum number of bands / meter. */
> > +#define METER_BAND_MAX (8)
> >
> >  COVERAGE_DEFINE(datapath_drop_meter);
> >  COVERAGE_DEFINE(datapath_drop_upcall_error);
> > @@ -280,6 +282,9 @@ struct dp_meter_band {  };
> >
> >  struct dp_meter {
> > +    struct cmap_node node;
> > +    struct ovs_mutex lock;
> > +    uint32_t id;
> >      uint16_t flags;
> >      uint16_t n_bands;
> >      uint32_t max_delta_t;
> > @@ -289,6 +294,12 @@ struct dp_meter {
> >      struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_netdev_meter {
> > +    struct cmap table OVS_GUARDED;
> > +    struct ovs_mutex lock;  /* Used for meter table. */
> > +    uint32_t hash_basis;
> > +};
> > +
> >  struct pmd_auto_lb {
> >      bool auto_lb_requested;     /* Auto load balancing requested by user. */
> >      bool is_enabled;            /* Current status of Auto load balancing. */
> > @@ -329,8 +340,7 @@ struct dp_netdev {
> >      atomic_uint32_t tx_flush_interval;
> >
> >      /* Meters. */
> > -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> > -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> > +    struct dp_netdev_meter *meter;
> >
> >      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
> >      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> > @@ -378,19 +388,6 @@ struct dp_netdev {
> >      struct pmd_auto_lb pmd_alb;
> >  };
> >
> > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -
> >  static struct dp_netdev_port *dp_netdev_lookup_port(const struct
> > dp_netdev *dp,
> >                                                      odp_port_t)
> >      OVS_REQUIRES(dp->port_mutex);
> > @@ -1523,6 +1520,84 @@ choose_port(struct dp_netdev *dp, const char
> > *name)
> >      return ODPP_NONE;
> >  }
> >
> > +static uint32_t
> > +dp_meter_hash(uint32_t meter_id, uint32_t basis) {
> > +    return hash_bytes32(&meter_id, sizeof meter_id, basis); }
> > +
> > +static void
> > +dp_netdev_meter_init(struct dp_netdev *dp) {
> > +    struct dp_netdev_meter *dp_meter;
> > +
> > +    dp_meter = xmalloc(sizeof *dp_meter);
> > +
> > +    cmap_init(&dp_meter->table);
> > +    ovs_mutex_init(&dp_meter->lock);
> > +    dp_meter->hash_basis = random_uint32();
> > +
> > +    dp->meter = dp_meter;
> > +}
> > +
> > +static void
> > +dp_netdev_meter_destroy(struct dp_netdev *dp) {
> > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > +    struct dp_meter *meter;
> > +
> > +    ovs_mutex_lock(&dp_meter->lock);
> > +    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
> > +            cmap_remove(&dp_meter->table, &meter->node,
> > +                        dp_meter_hash(meter->id,
> > + dp_meter->hash_basis));
> > +
> > +            ovsrcu_postpone(free, meter);
> > +    }
> > +
> > +    cmap_destroy(&dp_meter->table);
> > +    ovs_mutex_unlock(&dp_meter->lock);
> > +
> > +    ovs_mutex_destroy(&dp_meter->lock);
> > +    free(dp_meter);
> > +}
> > +
> > +static struct dp_meter*
> > +dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id) {
> > +    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);
> > +    struct dp_meter *meter;
> > +
> > +    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
> > +        if (meter->id == meter_id) {
> > +            return meter;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t
> > meter_id)
> > +                     OVS_REQUIRES(dp_meter->lock) {
> > +    struct dp_meter *meter;
> > +
> > +    meter = dp_meter_lookup(dp_meter, meter_id);
> > +    if (meter) {
> > +            cmap_remove(&dp_meter->table, &meter->node,
> > +                        dp_meter_hash(meter_id, dp_meter->hash_basis));
> > +            ovsrcu_postpone(free, meter);
> > +    }
> > +}
> > +
> > +static void
> > +dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter
> > *meter)
> > +                OVS_REQUIRES(dp_meter->lock) {
> > +    cmap_insert(&dp_meter->table, &meter->node,
> > +                dp_meter_hash(meter->id, dp_meter->hash_basis)); }
> > +
> >  static int
> >  create_dp_netdev(const char *name, const struct dpif_class *class,
> >                   struct dp_netdev **dpp) @@ -1556,9 +1631,8 @@
> > create_dp_netdev(const char *name, const struct dpif_class *class,
> >      dp->reconfigure_seq = seq_create();
> >      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> >
> > -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> > -    }
> > +    /* Init meter resource. */
> > +    dp_netdev_meter_init(dp);
> >
> >      /* Disable upcalls by default. */
> >      dp_netdev_disable_upcall(dp);
> > @@ -1647,16 +1721,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev
> > *dp)
> >      fat_rwlock_destroy(&dp->upcall_rwlock);
> >  }
> >
> > -static void
> > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    if (dp->meters[meter_id]) {
> > -        free(dp->meters[meter_id]);
> > -        dp->meters[meter_id] = NULL;
> > -    }
> > -}
> > -
> >  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
> >   * through the 'dp_netdevs' shash while freeing 'dp'. */  static void @@ -
> > 1694,16 +1758,7 @@ dp_netdev_free(struct dp_netdev *dp)
> >      /* Upcalls must be disabled at this point */
> >      dp_netdev_destroy_upcall_lock(dp);
> >
> > -    int i;
> > -
> > -    for (i = 0; i < MAX_METERS; ++i) {
> > -        meter_lock(dp, i);
> > -        dp_delete_meter(dp, i);
> > -        meter_unlock(dp, i);
> > -    }
> > -    for (i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_destroy(&dp->meter_locks[i]);
> > -    }
> > +    dp_netdev_meter_destroy(dp);
> >
> >      free(dp->pmd_cmask);
> >      free(CONST_CAST(char *, dp->name)); @@ -5708,10 +5763,10 @@ static
> > void  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
> >                                 struct ofputil_meter_features *features)  {
> > -    features->max_meters = MAX_METERS;
> > +    features->max_meters = METER_ENTRY_MAX;
> >      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
> >      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> > -    features->max_bands = MAX_BANDS;
> > +    features->max_bands = METER_BAND_MAX;
> >      features->max_color = 0;
> >  }
> >
> > @@ -5732,14 +5787,13 @@ dp_netdev_run_meter(struct dp_netdev *dp,
> > struct dp_packet_batch *packets_,
> >      uint32_t exceeded_rate[NETDEV_MAX_BURST];
> >      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(dp->meter, meter_id);
> >      if (!meter) {
> > -        goto out;
> > +        return;
> >      }
> >
> >      /* Initialize as negative values. */ @@ -5747,6 +5801,7 @@
> > dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch
> > *packets_,
> >      /* Initialize as zeroes. */
> >      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
> >
> > +    ovs_mutex_lock(&meter->lock);
> >      /* All packets will hit the meter at the same time. */
> >      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
> >
> > @@ -5860,8 +5915,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct
> > dp_packet_batch *packets_,
> >              dp_packet_batch_refill(packets_, packet, j);
> >          }
> >      }
> > - out:
> > -    meter_unlock(dp, meter_id);
> > +
> > +    ovs_mutex_unlock(&meter->lock);
> >  }
> >
> >  /* Meter set/get/del processing is still single-threaded. */ @@ -5870,11
> > +5925,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id
> > meter_id,
> >                        struct ofputil_meter_config *config)  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_netdev_meter *dp_meter = dp->meter;
> >      uint32_t mid = meter_id.uint32;
> >      struct dp_meter *meter;
> >      int i;
> >
> > -    if (mid >= MAX_METERS) {
> > +    if (mid >= METER_ENTRY_MAX) {
> >          return EFBIG; /* Meter_id out of range. */
> >      }
> >
> > @@ -5882,7 +5938,7 @@ dpif_netdev_meter_set(struct dpif *dpif,
> > ofproto_meter_id meter_id,
> >          return EBADF; /* Unsupported flags set */
> >      }
> >
> > -    if (config->n_bands > MAX_BANDS) {
> > +    if (config->n_bands > METER_BAND_MAX) {
> >          return EINVAL;
> >      }
> >
> > @@ -5903,6 +5959,8 @@ dpif_netdev_meter_set(struct dpif *dpif,
> > ofproto_meter_id meter_id,
> >      meter->n_bands = config->n_bands;
> >      meter->max_delta_t = 0;
> >      meter->used = time_usec();
> > +    meter->id = mid;
> > +    ovs_mutex_init(&meter->lock);
> >
> >      /* set up bands */
> >      for (i = 0; i < config->n_bands; ++i) { @@ -5928,10 +5986,12 @@
> > dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >          }
> >      }
> >
> > -    meter_lock(dp, mid);
> > -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> > -    dp->meters[mid] = meter;
> > -    meter_unlock(dp, mid);
> > +    ovs_mutex_lock(&dp_meter->lock);
> > +
> > +    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any */
> > +    dp_meter_attach(dp_meter, meter);
> > +
> > +    ovs_mutex_unlock(&dp_meter->lock);
> >
> >      return 0;
> >  }
> > @@ -5941,22 +6001,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >                        ofproto_meter_id meter_id_,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)  {
> > -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_netdev *dp = get_dp_netdev(dpif);
> >      uint32_t meter_id = meter_id_.uint32;
> > -    int retval = 0;
> > +    const struct dp_meter *meter;
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return EFBIG;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    const struct dp_meter *meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(dp->meter, meter_id);
> >      if (!meter) {
> > -        retval = ENOENT;
> > -        goto done;
> > +        return ENOENT;
> >      }
> > +
> >      if (stats) {
> > -        int i = 0;
> > +        int i;
> > +
> > +        ovs_mutex_lock(&meter->lock);
> >
> >          stats->packet_in_count = meter->packet_count;
> >          stats->byte_in_count = meter->byte_count; @@ -5966,12 +6027,11 @@
> > dpif_netdev_meter_get(const struct dpif *dpif,
> >              stats->bands[i].byte_count = meter->bands[i].byte_count;
> >          }
> >
> > +        ovs_mutex_unlock(&meter->lock);
> >          stats->n_bands = i;
> >      }
> >
> > -done:
> > -    meter_unlock(dp, meter_id);
> > -    return retval;
> > +    return 0;
> >  }
> >
> >  static int
> > @@ -5980,15 +6040,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_netdev_meter *dp_meter = dp->meter;
> >      int error;
> >
> >      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
> >      if (!error) {
> >          uint32_t meter_id = meter_id_.uint32;
> >
> > -        meter_lock(dp, meter_id);
> > -        dp_delete_meter(dp, meter_id);
> > -        meter_unlock(dp, meter_id);
> > +        ovs_mutex_lock(&dp_meter->lock);
> > +        dp_meter_detach_free(dp_meter, meter_id);
> > +        ovs_mutex_unlock(&dp_meter->lock);
> >      }
> >      return error;
> >  }
> > --
> > 1.8.3.1
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Tonghao Zhang March 24, 2020, 1:25 a.m. UTC | #3
On Mon, Mar 16, 2020 at 12:06 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Mon, Mar 16, 2020 at 11:07 AM Yanqin Wei <Yanqin.Wei@arm.com> wrote:
> >
> > Hi Xiangxia,
> >
> > The meter id is allocated by id_pool,  which can always return the lowest available id.
> Yes, I added 70000+ meters, and it works fine.
> > There is a light scalable data struct which supports direct address lookup. It can achieve several times performance improvement than cmap_find. And it has not copy memory when expanding.
> > https://patchwork.ozlabs.org/patch/1253447/
> >
> > I suggest using this light data struct for >65535 meter instance. Would you like to take a look at it?
> Yes, and I also wait Ilya to review, and offer a suggestion.
Hi maintainers,
Will you have a plan to help me to review those patches, thanks.

http://patchwork.ozlabs.org/patch/1254968/
http://patchwork.ozlabs.org/patch/1254969/
http://patchwork.ozlabs.org/patch/1255408/

> > Best Regards,
> > Wei Yanqin
> >
> > > -----Original Message-----
> > > From: dev <ovs-dev-bounces@openvswitch.org> On Behalf Of
> > > xiangxia.m.yue@gmail.com
> > > Sent: Sunday, March 15, 2020 2:43 PM
> > > To: dev@openvswitch.org
> > > Cc: Ilya Maximets <i.maximets@ovn.org>
> > > Subject: [ovs-dev] [PATCH v2 1/2] dpif-netdev: Expand the meter capacity
> > > using cmap
> > >
> > > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >
> > > For now, ovs-vswitchd use the array of the dp_meter struct to store meter's
> > > data, and at most, there are only 65536 (defined by MAX_METERS) meters
> > > that can be used. But in some case, for example, in the edge gateway, we
> > > should use 200,000+, at least, meters for IP address bandwidth limitation.
> > > Every one IP address will use two meters for its rx and tx path[1]. In other way,
> > > ovs-vswitchd should support meter-offload (rte_mtr_xxx api introduced by
> > > dpdk.), but there are more than
> > > 65536 meters in the hardware, such as Mellanox ConnectX-6.
> > >
> > > This patch use cmap to manage the meter, instead of the array.
> > >
> > > * Insertion performance, ovs-ofctl add-meter 1000+ meters,
> > >   the cmap takes abount 4000ms, as same as previous implementation.
> > > * Lookup performance in datapath, we add 1000+ meter which rate is
> > >   10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
> > >   packets.), and a flow which only forwarding the packets from p0
> > >   to p1, with meter action[2]. On other server, the pktgen-dpdk
> > >   will generate 64B packets to p0.
> > >   The forwarding performance is 4,814,400 pps. Without this path,
> > >   4,935,584 pps. There are about 1% performance loss. For addressing
> > >   this issue, next patch add a meter cache.
> > >
> > > [1].
> > > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> > > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
> > >
> > > [2].
> > > $ in_port=p0 action=meter:100,output:p1
> > >
> > > Cc: Ben Pfaff <blp@ovn.org>
> > > Cc: Jarno Rajahalme <jarno@ovn.org>
> > > Cc: Ilya Maximets <i.maximets@ovn.org>
> > > Cc: Andy Zhou <azhou@ovn.org>
> > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > ---
> > >  lib/dpif-netdev.c | 199 +++++++++++++++++++++++++++++++++++-----------------
> > > --
> > >  1 file changed, 130 insertions(+), 69 deletions(-)
> > >
> > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d393aab..5474d52
> > > 100644
> > > --- a/lib/dpif-netdev.c
> > > +++ b/lib/dpif-netdev.c
> > > @@ -98,9 +98,11 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t,
> > > recirc_depth, 0)
> > >
> > >  /* Configuration parameters. */
> > >  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table.
> > > */
> > > -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> > > -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> > > -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> > > +
> > > +/* Maximum number of meters in the table. */ #define METER_ENTRY_MAX
> > > (1
> > > +<< 19)
> > > +/* Maximum number of bands / meter. */
> > > +#define METER_BAND_MAX (8)
> > >
> > >  COVERAGE_DEFINE(datapath_drop_meter);
> > >  COVERAGE_DEFINE(datapath_drop_upcall_error);
> > > @@ -280,6 +282,9 @@ struct dp_meter_band {  };
> > >
> > >  struct dp_meter {
> > > +    struct cmap_node node;
> > > +    struct ovs_mutex lock;
> > > +    uint32_t id;
> > >      uint16_t flags;
> > >      uint16_t n_bands;
> > >      uint32_t max_delta_t;
> > > @@ -289,6 +294,12 @@ struct dp_meter {
> > >      struct dp_meter_band bands[];
> > >  };
> > >
> > > +struct dp_netdev_meter {
> > > +    struct cmap table OVS_GUARDED;
> > > +    struct ovs_mutex lock;  /* Used for meter table. */
> > > +    uint32_t hash_basis;
> > > +};
> > > +
> > >  struct pmd_auto_lb {
> > >      bool auto_lb_requested;     /* Auto load balancing requested by user. */
> > >      bool is_enabled;            /* Current status of Auto load balancing. */
> > > @@ -329,8 +340,7 @@ struct dp_netdev {
> > >      atomic_uint32_t tx_flush_interval;
> > >
> > >      /* Meters. */
> > > -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> > > -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> > > +    struct dp_netdev_meter *meter;
> > >
> > >      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
> > >      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> > > @@ -378,19 +388,6 @@ struct dp_netdev {
> > >      struct pmd_auto_lb pmd_alb;
> > >  };
> > >
> > > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> > > -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > > -{
> > > -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > > -}
> > > -
> > > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> > > -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > > -{
> > > -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > > -}
> > > -
> > > -
> > >  static struct dp_netdev_port *dp_netdev_lookup_port(const struct
> > > dp_netdev *dp,
> > >                                                      odp_port_t)
> > >      OVS_REQUIRES(dp->port_mutex);
> > > @@ -1523,6 +1520,84 @@ choose_port(struct dp_netdev *dp, const char
> > > *name)
> > >      return ODPP_NONE;
> > >  }
> > >
> > > +static uint32_t
> > > +dp_meter_hash(uint32_t meter_id, uint32_t basis) {
> > > +    return hash_bytes32(&meter_id, sizeof meter_id, basis); }
> > > +
> > > +static void
> > > +dp_netdev_meter_init(struct dp_netdev *dp) {
> > > +    struct dp_netdev_meter *dp_meter;
> > > +
> > > +    dp_meter = xmalloc(sizeof *dp_meter);
> > > +
> > > +    cmap_init(&dp_meter->table);
> > > +    ovs_mutex_init(&dp_meter->lock);
> > > +    dp_meter->hash_basis = random_uint32();
> > > +
> > > +    dp->meter = dp_meter;
> > > +}
> > > +
> > > +static void
> > > +dp_netdev_meter_destroy(struct dp_netdev *dp) {
> > > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > > +    struct dp_meter *meter;
> > > +
> > > +    ovs_mutex_lock(&dp_meter->lock);
> > > +    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
> > > +            cmap_remove(&dp_meter->table, &meter->node,
> > > +                        dp_meter_hash(meter->id,
> > > + dp_meter->hash_basis));
> > > +
> > > +            ovsrcu_postpone(free, meter);
> > > +    }
> > > +
> > > +    cmap_destroy(&dp_meter->table);
> > > +    ovs_mutex_unlock(&dp_meter->lock);
> > > +
> > > +    ovs_mutex_destroy(&dp_meter->lock);
> > > +    free(dp_meter);
> > > +}
> > > +
> > > +static struct dp_meter*
> > > +dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id) {
> > > +    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);
> > > +    struct dp_meter *meter;
> > > +
> > > +    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
> > > +        if (meter->id == meter_id) {
> > > +            return meter;
> > > +        }
> > > +    }
> > > +
> > > +    return NULL;
> > > +}
> > > +
> > > +static void
> > > +dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t
> > > meter_id)
> > > +                     OVS_REQUIRES(dp_meter->lock) {
> > > +    struct dp_meter *meter;
> > > +
> > > +    meter = dp_meter_lookup(dp_meter, meter_id);
> > > +    if (meter) {
> > > +            cmap_remove(&dp_meter->table, &meter->node,
> > > +                        dp_meter_hash(meter_id, dp_meter->hash_basis));
> > > +            ovsrcu_postpone(free, meter);
> > > +    }
> > > +}
> > > +
> > > +static void
> > > +dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter
> > > *meter)
> > > +                OVS_REQUIRES(dp_meter->lock) {
> > > +    cmap_insert(&dp_meter->table, &meter->node,
> > > +                dp_meter_hash(meter->id, dp_meter->hash_basis)); }
> > > +
> > >  static int
> > >  create_dp_netdev(const char *name, const struct dpif_class *class,
> > >                   struct dp_netdev **dpp) @@ -1556,9 +1631,8 @@
> > > create_dp_netdev(const char *name, const struct dpif_class *class,
> > >      dp->reconfigure_seq = seq_create();
> > >      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> > >
> > > -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> > > -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> > > -    }
> > > +    /* Init meter resource. */
> > > +    dp_netdev_meter_init(dp);
> > >
> > >      /* Disable upcalls by default. */
> > >      dp_netdev_disable_upcall(dp);
> > > @@ -1647,16 +1721,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev
> > > *dp)
> > >      fat_rwlock_destroy(&dp->upcall_rwlock);
> > >  }
> > >
> > > -static void
> > > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> > > -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > > -{
> > > -    if (dp->meters[meter_id]) {
> > > -        free(dp->meters[meter_id]);
> > > -        dp->meters[meter_id] = NULL;
> > > -    }
> > > -}
> > > -
> > >  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
> > >   * through the 'dp_netdevs' shash while freeing 'dp'. */  static void @@ -
> > > 1694,16 +1758,7 @@ dp_netdev_free(struct dp_netdev *dp)
> > >      /* Upcalls must be disabled at this point */
> > >      dp_netdev_destroy_upcall_lock(dp);
> > >
> > > -    int i;
> > > -
> > > -    for (i = 0; i < MAX_METERS; ++i) {
> > > -        meter_lock(dp, i);
> > > -        dp_delete_meter(dp, i);
> > > -        meter_unlock(dp, i);
> > > -    }
> > > -    for (i = 0; i < N_METER_LOCKS; ++i) {
> > > -        ovs_mutex_destroy(&dp->meter_locks[i]);
> > > -    }
> > > +    dp_netdev_meter_destroy(dp);
> > >
> > >      free(dp->pmd_cmask);
> > >      free(CONST_CAST(char *, dp->name)); @@ -5708,10 +5763,10 @@ static
> > > void  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
> > >                                 struct ofputil_meter_features *features)  {
> > > -    features->max_meters = MAX_METERS;
> > > +    features->max_meters = METER_ENTRY_MAX;
> > >      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
> > >      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> > > -    features->max_bands = MAX_BANDS;
> > > +    features->max_bands = METER_BAND_MAX;
> > >      features->max_color = 0;
> > >  }
> > >
> > > @@ -5732,14 +5787,13 @@ dp_netdev_run_meter(struct dp_netdev *dp,
> > > struct dp_packet_batch *packets_,
> > >      uint32_t exceeded_rate[NETDEV_MAX_BURST];
> > >      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
> > >
> > > -    if (meter_id >= MAX_METERS) {
> > > +    if (meter_id >= METER_ENTRY_MAX) {
> > >          return;
> > >      }
> > >
> > > -    meter_lock(dp, meter_id);
> > > -    meter = dp->meters[meter_id];
> > > +    meter = dp_meter_lookup(dp->meter, meter_id);
> > >      if (!meter) {
> > > -        goto out;
> > > +        return;
> > >      }
> > >
> > >      /* Initialize as negative values. */ @@ -5747,6 +5801,7 @@
> > > dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch
> > > *packets_,
> > >      /* Initialize as zeroes. */
> > >      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
> > >
> > > +    ovs_mutex_lock(&meter->lock);
> > >      /* All packets will hit the meter at the same time. */
> > >      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
> > >
> > > @@ -5860,8 +5915,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct
> > > dp_packet_batch *packets_,
> > >              dp_packet_batch_refill(packets_, packet, j);
> > >          }
> > >      }
> > > - out:
> > > -    meter_unlock(dp, meter_id);
> > > +
> > > +    ovs_mutex_unlock(&meter->lock);
> > >  }
> > >
> > >  /* Meter set/get/del processing is still single-threaded. */ @@ -5870,11
> > > +5925,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id
> > > meter_id,
> > >                        struct ofputil_meter_config *config)  {
> > >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > >      uint32_t mid = meter_id.uint32;
> > >      struct dp_meter *meter;
> > >      int i;
> > >
> > > -    if (mid >= MAX_METERS) {
> > > +    if (mid >= METER_ENTRY_MAX) {
> > >          return EFBIG; /* Meter_id out of range. */
> > >      }
> > >
> > > @@ -5882,7 +5938,7 @@ dpif_netdev_meter_set(struct dpif *dpif,
> > > ofproto_meter_id meter_id,
> > >          return EBADF; /* Unsupported flags set */
> > >      }
> > >
> > > -    if (config->n_bands > MAX_BANDS) {
> > > +    if (config->n_bands > METER_BAND_MAX) {
> > >          return EINVAL;
> > >      }
> > >
> > > @@ -5903,6 +5959,8 @@ dpif_netdev_meter_set(struct dpif *dpif,
> > > ofproto_meter_id meter_id,
> > >      meter->n_bands = config->n_bands;
> > >      meter->max_delta_t = 0;
> > >      meter->used = time_usec();
> > > +    meter->id = mid;
> > > +    ovs_mutex_init(&meter->lock);
> > >
> > >      /* set up bands */
> > >      for (i = 0; i < config->n_bands; ++i) { @@ -5928,10 +5986,12 @@
> > > dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> > >          }
> > >      }
> > >
> > > -    meter_lock(dp, mid);
> > > -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> > > -    dp->meters[mid] = meter;
> > > -    meter_unlock(dp, mid);
> > > +    ovs_mutex_lock(&dp_meter->lock);
> > > +
> > > +    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any */
> > > +    dp_meter_attach(dp_meter, meter);
> > > +
> > > +    ovs_mutex_unlock(&dp_meter->lock);
> > >
> > >      return 0;
> > >  }
> > > @@ -5941,22 +6001,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> > >                        ofproto_meter_id meter_id_,
> > >                        struct ofputil_meter_stats *stats, uint16_t n_bands)  {
> > > -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> > > +    struct dp_netdev *dp = get_dp_netdev(dpif);
> > >      uint32_t meter_id = meter_id_.uint32;
> > > -    int retval = 0;
> > > +    const struct dp_meter *meter;
> > >
> > > -    if (meter_id >= MAX_METERS) {
> > > +    if (meter_id >= METER_ENTRY_MAX) {
> > >          return EFBIG;
> > >      }
> > >
> > > -    meter_lock(dp, meter_id);
> > > -    const struct dp_meter *meter = dp->meters[meter_id];
> > > +    meter = dp_meter_lookup(dp->meter, meter_id);
> > >      if (!meter) {
> > > -        retval = ENOENT;
> > > -        goto done;
> > > +        return ENOENT;
> > >      }
> > > +
> > >      if (stats) {
> > > -        int i = 0;
> > > +        int i;
> > > +
> > > +        ovs_mutex_lock(&meter->lock);
> > >
> > >          stats->packet_in_count = meter->packet_count;
> > >          stats->byte_in_count = meter->byte_count; @@ -5966,12 +6027,11 @@
> > > dpif_netdev_meter_get(const struct dpif *dpif,
> > >              stats->bands[i].byte_count = meter->bands[i].byte_count;
> > >          }
> > >
> > > +        ovs_mutex_unlock(&meter->lock);
> > >          stats->n_bands = i;
> > >      }
> > >
> > > -done:
> > > -    meter_unlock(dp, meter_id);
> > > -    return retval;
> > > +    return 0;
> > >  }
> > >
> > >  static int
> > > @@ -5980,15 +6040,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
> > >                        struct ofputil_meter_stats *stats, uint16_t n_bands)  {
> > >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > >      int error;
> > >
> > >      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
> > >      if (!error) {
> > >          uint32_t meter_id = meter_id_.uint32;
> > >
> > > -        meter_lock(dp, meter_id);
> > > -        dp_delete_meter(dp, meter_id);
> > > -        meter_unlock(dp, meter_id);
> > > +        ovs_mutex_lock(&dp_meter->lock);
> > > +        dp_meter_detach_free(dp_meter, meter_id);
> > > +        ovs_mutex_unlock(&dp_meter->lock);
> > >      }
> > >      return error;
> > >  }
> > > --
> > > 1.8.3.1
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
>
>
>
> --
> Thanks,
> Tonghao
Ben Pfaff March 24, 2020, 5:11 a.m. UTC | #4
On Tue, Mar 24, 2020 at 09:25:41AM +0800, Tonghao Zhang wrote:
> On Mon, Mar 16, 2020 at 12:06 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> >
> > On Mon, Mar 16, 2020 at 11:07 AM Yanqin Wei <Yanqin.Wei@arm.com> wrote:
> > >
> > > Hi Xiangxia,
> > >
> > > The meter id is allocated by id_pool,  which can always return the lowest available id.
> > Yes, I added 70000+ meters, and it works fine.
> > > There is a light scalable data struct which supports direct address lookup. It can achieve several times performance improvement than cmap_find. And it has not copy memory when expanding.
> > > https://patchwork.ozlabs.org/patch/1253447/
> > >
> > > I suggest using this light data struct for >65535 meter instance. Would you like to take a look at it?
> > Yes, and I also wait Ilya to review, and offer a suggestion.
> Hi maintainers,
> Will you have a plan to help me to review those patches, thanks.

I know that I skipped over this because I was waiting for v3 using
idpool.
Tonghao Zhang March 24, 2020, 12:40 p.m. UTC | #5
On Tue, Mar 24, 2020 at 1:11 PM Ben Pfaff <blp@ovn.org> wrote:
>
> On Tue, Mar 24, 2020 at 09:25:41AM +0800, Tonghao Zhang wrote:
> > On Mon, Mar 16, 2020 at 12:06 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > >
> > > On Mon, Mar 16, 2020 at 11:07 AM Yanqin Wei <Yanqin.Wei@arm.com> wrote:
> > > >
> > > > Hi Xiangxia,
> > > >
> > > > The meter id is allocated by id_pool,  which can always return the lowest available id.
> > > Yes, I added 70000+ meters, and it works fine.
> > > > There is a light scalable data struct which supports direct address lookup. It can achieve several times performance improvement than cmap_find. And it has not copy memory when expanding.
> > > > https://patchwork.ozlabs.org/patch/1253447/
> > > >
> > > > I suggest using this light data struct for >65535 meter instance. Would you like to take a look at it?
> > > Yes, and I also wait Ilya to review, and offer a suggestion.
> > Hi maintainers,
> > Will you have a plan to help me to review those patches, thanks.
>
> I know that I skipped over this because I was waiting for v3 using
> idpool.
Hi Ben,
The meter id is allocated by id_pool in ofproto-dpif layer(meter_set
function use the id_pool_alloc_id to allocate it).
I guess what Yanqin  mean is that I should use the patches[1] he sent,
to compare the performance with cmap.
The patches of Yanqin are not in upstream, but I use the meter cache
to address the issue.

[1] - https://patchwork.ozlabs.org/patch/1253447/
Tonghao Zhang March 27, 2020, 9:52 a.m. UTC | #6
On Tue, Mar 24, 2020 at 8:40 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Tue, Mar 24, 2020 at 1:11 PM Ben Pfaff <blp@ovn.org> wrote:
> >
> > On Tue, Mar 24, 2020 at 09:25:41AM +0800, Tonghao Zhang wrote:
> > > On Mon, Mar 16, 2020 at 12:06 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
> > > >
> > > > On Mon, Mar 16, 2020 at 11:07 AM Yanqin Wei <Yanqin.Wei@arm.com> wrote:
> > > > >
> > > > > Hi Xiangxia,
> > > > >
> > > > > The meter id is allocated by id_pool,  which can always return the lowest available id.
> > > > Yes, I added 70000+ meters, and it works fine.
> > > > > There is a light scalable data struct which supports direct address lookup. It can achieve several times performance improvement than cmap_find. And it has not copy memory when expanding.
> > > > > https://patchwork.ozlabs.org/patch/1253447/
> > > > >
> > > > > I suggest using this light data struct for >65535 meter instance. Would you like to take a look at it?
> > > > Yes, and I also wait Ilya to review, and offer a suggestion.
> > > Hi maintainers,
> > > Will you have a plan to help me to review those patches, thanks.
> >
> > I know that I skipped over this because I was waiting for v3 using
> > idpool.
> Hi Ben,
> The meter id is allocated by id_pool in ofproto-dpif layer(meter_set
> function use the id_pool_alloc_id to allocate it).
> I guess what Yanqin  mean is that I should use the patches[1] he sent,
> to compare the performance with cmap.
> The patches of Yanqin are not in upstream, but I use the meter cache
> to address the issue.
Hi Ben, Yanqin
Did I misunderstand ?
> [1] - https://patchwork.ozlabs.org/patch/1253447/
> --
> Best regards, Tonghao
Ilya Maximets April 26, 2021, 6:38 p.m. UTC | #7
On 3/15/20 7:43 AM, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> For now, ovs-vswitchd use the array of the dp_meter struct
> to store meter's data, and at most, there are only 65536
> (defined by MAX_METERS) meters that can be used. But in some
> case, for example, in the edge gateway, we should use 200,000+,
> at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx
> path[1]. In other way, ovs-vswitchd should support meter-offload
> (rte_mtr_xxx api introduced by dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
> 
> This patch use cmap to manage the meter, instead of the array.

Hi.  Here is a around of review as discussed in review of v3 of this series.

It's a bit confusing that there was two versions of v2 of this patch set.
Some comments might be same as I already had for another v2...

> 
> * Insertion performance, ovs-ofctl add-meter 1000+ meters,
>   the cmap takes abount 4000ms, as same as previous implementation.
> * Lookup performance in datapath, we add 1000+ meter which rate is
>   10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
>   packets.), and a flow which only forwarding the packets from p0
>   to p1, with meter action[2]. On other server, the pktgen-dpdk
>   will generate 64B packets to p0.
>   The forwarding performance is 4,814,400 pps. Without this path,
>   4,935,584 pps. There are about 1% performance loss. For addressing
>   this issue, next patch add a meter cache.

To get back some of the performance you may consider switching form
mutex to spin lock for meters themselves.  (Note that fallback for
systems that has no spin locks will be needed in this case, so you
might want to keep meter_lock/unclock functions.)

Also, this test case looks very synthetic and in a more real-world
setup perfromance diference should be lower, so, likely, doesn't
worth efforts to create caches and stuff.

> 
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
> 
> [2].
> $ in_port=p0 action=meter:100,output:p1
> 
> Cc: Ben Pfaff <blp@ovn.org>
> Cc: Jarno Rajahalme <jarno@ovn.org>
> Cc: Ilya Maximets <i.maximets@ovn.org>
> Cc: Andy Zhou <azhou@ovn.org>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> --->  lib/dpif-netdev.c | 199 +++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 130 insertions(+), 69 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index d393aab..5474d52 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -98,9 +98,11 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
>  
>  /* Configuration parameters. */
>  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> +
> +/* Maximum number of meters in the table. */
> +#define METER_ENTRY_MAX (1 << 19)
> +/* Maximum number of bands / meter. */
> +#define METER_BAND_MAX (8)

It's unclear why enums converted to macros and why we need to change names.
Please, keep them as is.

>  
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -280,6 +282,9 @@ struct dp_meter_band {
>  };
>  
>  struct dp_meter {
> +    struct cmap_node node;
> +    struct ovs_mutex lock;
> +    uint32_t id;
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> @@ -289,6 +294,12 @@ struct dp_meter {
>      struct dp_meter_band bands[];
>  };
>  
> +struct dp_netdev_meter {
> +    struct cmap table OVS_GUARDED;
> +    struct ovs_mutex lock;  /* Used for meter table. */
> +    uint32_t hash_basis;

Not sure if we need a hash basis, especially because you're choosing
it randomly.  Basis for hash functions used to choose a beeter hashing
schema if you know that some particular base value will provide better
hash distribution for particular type of data, or if you need ot hash
two equal values and you want them to be hashed differently.  Both cases
are not applicable here, so, we can simply use 0 always as a hash basis.
Also hashing might be a bit faster if basis is a compile-time constant.

> +};
> +
>  struct pmd_auto_lb {
>      bool auto_lb_requested;     /* Auto load balancing requested by user. */
>      bool is_enabled;            /* Current status of Auto load balancing. */
> @@ -329,8 +340,7 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>  
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct dp_netdev_meter *meter;


We're  not going to create more than one instance of this structure
per datapath, so , I think, to reduce naming confusion, it's better
to just expand it right here:

       struct cmap meters OVS_GUARDED;
       struct ovs_mutex meters_lock;

>  
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -378,19 +388,6 @@ struct dp_netdev {
>      struct pmd_auto_lb pmd_alb;
>  };
>  
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1523,6 +1520,84 @@ choose_port(struct dp_netdev *dp, const char *name)
>      return ODPP_NONE;
>  }
>  
> +static uint32_t
> +dp_meter_hash(uint32_t meter_id, uint32_t basis)
> +{
> +    return hash_bytes32(&meter_id, sizeof meter_id, basis);

Instead of hash_bytes32 it might be better to use simple hash_int().
Benefits: no need for the function at all, callers might just use
hash_int() with zero basis (basis is likely not needed as I explained
above).  Also, more compilers will fully inline hash_int() unlike
hash_bytes32().


> +}
> +
> +static void
> +dp_netdev_meter_init(struct dp_netdev *dp)

Without the memory allocation and calculation of a basis this
function could be fully inlined into create_dp_netdev().

> +{
> +    struct dp_netdev_meter *dp_meter;
> +
> +    dp_meter = xmalloc(sizeof *dp_meter);
> +
> +    cmap_init(&dp_meter->table);
> +    ovs_mutex_init(&dp_meter->lock);
> +    dp_meter->hash_basis = random_uint32();

Random is not better than 0.

> +
> +    dp->meter = dp_meter;
> +}
> +
> +static void
> +dp_netdev_meter_destroy(struct dp_netdev *dp)
> +{
> +    struct dp_netdev_meter *dp_meter = dp->meter;
> +    struct dp_meter *meter;
> +
> +    ovs_mutex_lock(&dp_meter->lock);
> +    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
> +            cmap_remove(&dp_meter->table, &meter->node,
> +                        dp_meter_hash(meter->id, dp_meter->hash_basis));
> +
> +            ovsrcu_postpone(free, meter);

Overindented.  4 spaces, please.

> +    }
> +
> +    cmap_destroy(&dp_meter->table);
> +    ovs_mutex_unlock(&dp_meter->lock);
> +
> +    ovs_mutex_destroy(&dp_meter->lock);
> +    free(dp_meter);
> +}
> +
> +static struct dp_meter*
> +dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id)
> +{
> +    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);

hash_int(meter_id, 0);

> +    struct dp_meter *meter;
> +
> +    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
> +        if (meter->id == meter_id) {
> +            return meter;
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t meter_id)
> +                     OVS_REQUIRES(dp_meter->lock)

Overindented.

> +{
> +    struct dp_meter *meter;
> +
> +    meter = dp_meter_lookup(dp_meter, meter_id);
> +    if (meter) {
> +            cmap_remove(&dp_meter->table, &meter->node,
> +                        dp_meter_hash(meter_id, dp_meter->hash_basis));
> +            ovsrcu_postpone(free, meter);

ditto.

> +    }
> +}
> +
> +static void
> +dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter *meter)
> +                OVS_REQUIRES(dp_meter->lock)

ditto.

> +{
> +    cmap_insert(&dp_meter->table, &meter->node,
> +                dp_meter_hash(meter->id, dp_meter->hash_basis));> +}
> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp)
> @@ -1556,9 +1631,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>  
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    /* Init meter resource. */
> +    dp_netdev_meter_init(dp);
>  
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1647,16 +1721,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>  
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
>   * through the 'dp_netdevs' shash while freeing 'dp'. */
>  static void
> @@ -1694,16 +1758,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>  
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_destroy(dp);
>  
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -5708,10 +5763,10 @@ static void
>  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
>                                 struct ofputil_meter_features *features)
>  {
> -    features->max_meters = MAX_METERS;
> +    features->max_meters = METER_ENTRY_MAX;
>      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
>      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> -    features->max_bands = MAX_BANDS;
> +    features->max_bands = METER_BAND_MAX;
>      features->max_color = 0;
>  }
>  
> @@ -5732,14 +5787,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      uint32_t exceeded_rate[NETDEV_MAX_BURST];
>      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(dp->meter, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>  
>      /* Initialize as negative values. */
> @@ -5747,6 +5801,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>  
> +    ovs_mutex_lock(&meter->lock);
>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>  
> @@ -5860,8 +5915,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>  
>  /* Meter set/get/del processing is still single-threaded. */
> @@ -5870,11 +5925,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>                        struct ofputil_meter_config *config)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev_meter *dp_meter = dp->meter;
>      uint32_t mid = meter_id.uint32;
>      struct dp_meter *meter;
>      int i;
>  
> -    if (mid >= MAX_METERS) {
> +    if (mid >= METER_ENTRY_MAX) {
>          return EFBIG; /* Meter_id out of range. */
>      }
>  
> @@ -5882,7 +5938,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          return EBADF; /* Unsupported flags set */
>      }
>  
> -    if (config->n_bands > MAX_BANDS) {
> +    if (config->n_bands > METER_BAND_MAX) {
>          return EINVAL;
>      }
>  
> @@ -5903,6 +5959,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>  
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) {
> @@ -5928,10 +5986,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
>          }
>      }
>  
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&dp_meter->lock);
> +
> +    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any */
> +    dp_meter_attach(dp_meter, meter);
> +
> +    ovs_mutex_unlock(&dp_meter->lock);
>  
>      return 0;
>  }
> @@ -5941,22 +6001,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return EFBIG;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(dp->meter, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
> -        int i = 0;
> +        int i;

This change is not necessary.

> +
> +        ovs_mutex_lock(&meter->lock);
>  
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count;
> @@ -5966,12 +6027,11 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
>  
> +        ovs_mutex_unlock(&meter->lock);
>          stats->n_bands = i;
>      }
>  
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>  
>  static int
> @@ -5980,15 +6040,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev_meter *dp_meter = dp->meter;
>      int error;
>  
>      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>  
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&dp_meter->lock);
> +        dp_meter_detach_free(dp_meter, meter_id);
> +        ovs_mutex_unlock(&dp_meter->lock);
>      }
>      return error;
>  }
>
Tonghao Zhang May 12, 2021, 9:19 a.m. UTC | #8
On Tue, Apr 27, 2021 at 2:38 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 3/15/20 7:43 AM, xiangxia.m.yue@gmail.com wrote:
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > For now, ovs-vswitchd use the array of the dp_meter struct
> > to store meter's data, and at most, there are only 65536
> > (defined by MAX_METERS) meters that can be used. But in some
> > case, for example, in the edge gateway, we should use 200,000+,
> > at least, meters for IP address bandwidth limitation.
> > Every one IP address will use two meters for its rx and tx
> > path[1]. In other way, ovs-vswitchd should support meter-offload
> > (rte_mtr_xxx api introduced by dpdk.), but there are more than
> > 65536 meters in the hardware, such as Mellanox ConnectX-6.
> >
> > This patch use cmap to manage the meter, instead of the array.
>
> Hi.  Here is a around of review as discussed in review of v3 of this series.
>
> It's a bit confusing that there was two versions of v2 of this patch set.
> Some comments might be same as I already had for another v2...
>
> >
> > * Insertion performance, ovs-ofctl add-meter 1000+ meters,
> >   the cmap takes abount 4000ms, as same as previous implementation.
> > * Lookup performance in datapath, we add 1000+ meter which rate is
> >   10G (the NIC cards are 10Gb, so netdev-datapath will not drop the
> >   packets.), and a flow which only forwarding the packets from p0
> >   to p1, with meter action[2]. On other server, the pktgen-dpdk
> >   will generate 64B packets to p0.
> >   The forwarding performance is 4,814,400 pps. Without this path,
> >   4,935,584 pps. There are about 1% performance loss. For addressing
> >   this issue, next patch add a meter cache.
>
> To get back some of the performance you may consider switching form
> mutex to spin lock for meters themselves.  (Note that fallback for
> systems that has no spin locks will be needed in this case, so you
> might want to keep meter_lock/unclock functions.)
>
> Also, this test case looks very synthetic and in a more real-world
> setup perfromance diference should be lower, so, likely, doesn't
> worth efforts to create caches and stuff.
>
> >
> > [1].
> > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
> >
> > [2].
> > $ in_port=p0 action=meter:100,output:p1
> >
> > Cc: Ben Pfaff <blp@ovn.org>
> > Cc: Jarno Rajahalme <jarno@ovn.org>
> > Cc: Ilya Maximets <i.maximets@ovn.org>
> > Cc: Andy Zhou <azhou@ovn.org>
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > --->  lib/dpif-netdev.c | 199 +++++++++++++++++++++++++++++++++++-------------------
> >  1 file changed, 130 insertions(+), 69 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index d393aab..5474d52 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -98,9 +98,11 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
> >
> >  /* Configuration parameters. */
> >  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> > -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> > -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> > -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> > +
> > +/* Maximum number of meters in the table. */
> > +#define METER_ENTRY_MAX (1 << 19)
> > +/* Maximum number of bands / meter. */
> > +#define METER_BAND_MAX (8)
>
> It's unclear why enums converted to macros and why we need to change names.
> Please, keep them as is.
Hi Ilya
v3 is posted, please review.
http://patchwork.ozlabs.org/project/openvswitch/patch/20210512091736.48172-1-xiangxia.m.yue@gmail.com/
> >
> >  COVERAGE_DEFINE(datapath_drop_meter);
> >  COVERAGE_DEFINE(datapath_drop_upcall_error);
> > @@ -280,6 +282,9 @@ struct dp_meter_band {
> >  };
> >
> >  struct dp_meter {
> > +    struct cmap_node node;
> > +    struct ovs_mutex lock;
> > +    uint32_t id;
> >      uint16_t flags;
> >      uint16_t n_bands;
> >      uint32_t max_delta_t;
> > @@ -289,6 +294,12 @@ struct dp_meter {
> >      struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_netdev_meter {
> > +    struct cmap table OVS_GUARDED;
> > +    struct ovs_mutex lock;  /* Used for meter table. */
> > +    uint32_t hash_basis;
>
> Not sure if we need a hash basis, especially because you're choosing
> it randomly.  Basis for hash functions used to choose a beeter hashing
> schema if you know that some particular base value will provide better
> hash distribution for particular type of data, or if you need ot hash
> two equal values and you want them to be hashed differently.  Both cases
> are not applicable here, so, we can simply use 0 always as a hash basis.
> Also hashing might be a bit faster if basis is a compile-time constant.
>
> > +};
> > +
> >  struct pmd_auto_lb {
> >      bool auto_lb_requested;     /* Auto load balancing requested by user. */
> >      bool is_enabled;            /* Current status of Auto load balancing. */
> > @@ -329,8 +340,7 @@ struct dp_netdev {
> >      atomic_uint32_t tx_flush_interval;
> >
> >      /* Meters. */
> > -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> > -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> > +    struct dp_netdev_meter *meter;
>
>
> We're  not going to create more than one instance of this structure
> per datapath, so , I think, to reduce naming confusion, it's better
> to just expand it right here:
>
>        struct cmap meters OVS_GUARDED;
>        struct ovs_mutex meters_lock;
>
> >
> >      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
> >      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> > @@ -378,19 +388,6 @@ struct dp_netdev {
> >      struct pmd_auto_lb pmd_alb;
> >  };
> >
> > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -
> >  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
> >                                                      odp_port_t)
> >      OVS_REQUIRES(dp->port_mutex);
> > @@ -1523,6 +1520,84 @@ choose_port(struct dp_netdev *dp, const char *name)
> >      return ODPP_NONE;
> >  }
> >
> > +static uint32_t
> > +dp_meter_hash(uint32_t meter_id, uint32_t basis)
> > +{
> > +    return hash_bytes32(&meter_id, sizeof meter_id, basis);
>
> Instead of hash_bytes32 it might be better to use simple hash_int().
> Benefits: no need for the function at all, callers might just use
> hash_int() with zero basis (basis is likely not needed as I explained
> above).  Also, more compilers will fully inline hash_int() unlike
> hash_bytes32().
>
>
> > +}
> > +
> > +static void
> > +dp_netdev_meter_init(struct dp_netdev *dp)
>
> Without the memory allocation and calculation of a basis this
> function could be fully inlined into create_dp_netdev().
>
> > +{
> > +    struct dp_netdev_meter *dp_meter;
> > +
> > +    dp_meter = xmalloc(sizeof *dp_meter);
> > +
> > +    cmap_init(&dp_meter->table);
> > +    ovs_mutex_init(&dp_meter->lock);
> > +    dp_meter->hash_basis = random_uint32();
>
> Random is not better than 0.
>
> > +
> > +    dp->meter = dp_meter;
> > +}
> > +
> > +static void
> > +dp_netdev_meter_destroy(struct dp_netdev *dp)
> > +{
> > +    struct dp_netdev_meter *dp_meter = dp->meter;
> > +    struct dp_meter *meter;
> > +
> > +    ovs_mutex_lock(&dp_meter->lock);
> > +    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
> > +            cmap_remove(&dp_meter->table, &meter->node,
> > +                        dp_meter_hash(meter->id, dp_meter->hash_basis));
> > +
> > +            ovsrcu_postpone(free, meter);
>
> Overindented.  4 spaces, please.
>
> > +    }
> > +
> > +    cmap_destroy(&dp_meter->table);
> > +    ovs_mutex_unlock(&dp_meter->lock);
> > +
> > +    ovs_mutex_destroy(&dp_meter->lock);
> > +    free(dp_meter);
> > +}
> > +
> > +static struct dp_meter*
> > +dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id)
> > +{
> > +    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);
>
> hash_int(meter_id, 0);
>
> > +    struct dp_meter *meter;
> > +
> > +    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
> > +        if (meter->id == meter_id) {
> > +            return meter;
> > +        }
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t meter_id)
> > +                     OVS_REQUIRES(dp_meter->lock)
>
> Overindented.
>
> > +{
> > +    struct dp_meter *meter;
> > +
> > +    meter = dp_meter_lookup(dp_meter, meter_id);
> > +    if (meter) {
> > +            cmap_remove(&dp_meter->table, &meter->node,
> > +                        dp_meter_hash(meter_id, dp_meter->hash_basis));
> > +            ovsrcu_postpone(free, meter);
>
> ditto.
>
> > +    }
> > +}
> > +
> > +static void
> > +dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter *meter)
> > +                OVS_REQUIRES(dp_meter->lock)
>
> ditto.
>
> > +{
> > +    cmap_insert(&dp_meter->table, &meter->node,
> > +                dp_meter_hash(meter->id, dp_meter->hash_basis));> +}
> > +
> >  static int
> >  create_dp_netdev(const char *name, const struct dpif_class *class,
> >                   struct dp_netdev **dpp)
> > @@ -1556,9 +1631,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
> >      dp->reconfigure_seq = seq_create();
> >      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> >
> > -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> > -    }
> > +    /* Init meter resource. */
> > +    dp_netdev_meter_init(dp);
> >
> >      /* Disable upcalls by default. */
> >      dp_netdev_disable_upcall(dp);
> > @@ -1647,16 +1721,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
> >      fat_rwlock_destroy(&dp->upcall_rwlock);
> >  }
> >
> > -static void
> > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    if (dp->meters[meter_id]) {
> > -        free(dp->meters[meter_id]);
> > -        dp->meters[meter_id] = NULL;
> > -    }
> > -}
> > -
> >  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
> >   * through the 'dp_netdevs' shash while freeing 'dp'. */
> >  static void
> > @@ -1694,16 +1758,7 @@ dp_netdev_free(struct dp_netdev *dp)
> >      /* Upcalls must be disabled at this point */
> >      dp_netdev_destroy_upcall_lock(dp);
> >
> > -    int i;
> > -
> > -    for (i = 0; i < MAX_METERS; ++i) {
> > -        meter_lock(dp, i);
> > -        dp_delete_meter(dp, i);
> > -        meter_unlock(dp, i);
> > -    }
> > -    for (i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_destroy(&dp->meter_locks[i]);
> > -    }
> > +    dp_netdev_meter_destroy(dp);
> >
> >      free(dp->pmd_cmask);
> >      free(CONST_CAST(char *, dp->name));
> > @@ -5708,10 +5763,10 @@ static void
> >  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
> >                                 struct ofputil_meter_features *features)
> >  {
> > -    features->max_meters = MAX_METERS;
> > +    features->max_meters = METER_ENTRY_MAX;
> >      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
> >      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> > -    features->max_bands = MAX_BANDS;
> > +    features->max_bands = METER_BAND_MAX;
> >      features->max_color = 0;
> >  }
> >
> > @@ -5732,14 +5787,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
> >      uint32_t exceeded_rate[NETDEV_MAX_BURST];
> >      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(dp->meter, meter_id);
> >      if (!meter) {
> > -        goto out;
> > +        return;
> >      }
> >
> >      /* Initialize as negative values. */
> > @@ -5747,6 +5801,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
> >      /* Initialize as zeroes. */
> >      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
> >
> > +    ovs_mutex_lock(&meter->lock);
> >      /* All packets will hit the meter at the same time. */
> >      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
> >
> > @@ -5860,8 +5915,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
> >              dp_packet_batch_refill(packets_, packet, j);
> >          }
> >      }
> > - out:
> > -    meter_unlock(dp, meter_id);
> > +
> > +    ovs_mutex_unlock(&meter->lock);
> >  }
> >
> >  /* Meter set/get/del processing is still single-threaded. */
> > @@ -5870,11 +5925,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >                        struct ofputil_meter_config *config)
> >  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_netdev_meter *dp_meter = dp->meter;
> >      uint32_t mid = meter_id.uint32;
> >      struct dp_meter *meter;
> >      int i;
> >
> > -    if (mid >= MAX_METERS) {
> > +    if (mid >= METER_ENTRY_MAX) {
> >          return EFBIG; /* Meter_id out of range. */
> >      }
> >
> > @@ -5882,7 +5938,7 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >          return EBADF; /* Unsupported flags set */
> >      }
> >
> > -    if (config->n_bands > MAX_BANDS) {
> > +    if (config->n_bands > METER_BAND_MAX) {
> >          return EINVAL;
> >      }
> >
> > @@ -5903,6 +5959,8 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >      meter->n_bands = config->n_bands;
> >      meter->max_delta_t = 0;
> >      meter->used = time_usec();
> > +    meter->id = mid;
> > +    ovs_mutex_init(&meter->lock);
> >
> >      /* set up bands */
> >      for (i = 0; i < config->n_bands; ++i) {
> > @@ -5928,10 +5986,12 @@ dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
> >          }
> >      }
> >
> > -    meter_lock(dp, mid);
> > -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> > -    dp->meters[mid] = meter;
> > -    meter_unlock(dp, mid);
> > +    ovs_mutex_lock(&dp_meter->lock);
> > +
> > +    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any */
> > +    dp_meter_attach(dp_meter, meter);
> > +
> > +    ovs_mutex_unlock(&dp_meter->lock);
> >
> >      return 0;
> >  }
> > @@ -5941,22 +6001,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >                        ofproto_meter_id meter_id_,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)
> >  {
> > -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_netdev *dp = get_dp_netdev(dpif);
> >      uint32_t meter_id = meter_id_.uint32;
> > -    int retval = 0;
> > +    const struct dp_meter *meter;
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return EFBIG;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    const struct dp_meter *meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(dp->meter, meter_id);
> >      if (!meter) {
> > -        retval = ENOENT;
> > -        goto done;
> > +        return ENOENT;
> >      }
> > +
> >      if (stats) {
> > -        int i = 0;
> > +        int i;
>
> This change is not necessary.
>
> > +
> > +        ovs_mutex_lock(&meter->lock);
> >
> >          stats->packet_in_count = meter->packet_count;
> >          stats->byte_in_count = meter->byte_count;
> > @@ -5966,12 +6027,11 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >              stats->bands[i].byte_count = meter->bands[i].byte_count;
> >          }
> >
> > +        ovs_mutex_unlock(&meter->lock);
> >          stats->n_bands = i;
> >      }
> >
> > -done:
> > -    meter_unlock(dp, meter_id);
> > -    return retval;
> > +    return 0;
> >  }
> >
> >  static int
> > @@ -5980,15 +6040,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)
> >  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_netdev_meter *dp_meter = dp->meter;
> >      int error;
> >
> >      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
> >      if (!error) {
> >          uint32_t meter_id = meter_id_.uint32;
> >
> > -        meter_lock(dp, meter_id);
> > -        dp_delete_meter(dp, meter_id);
> > -        meter_unlock(dp, meter_id);
> > +        ovs_mutex_lock(&dp_meter->lock);
> > +        dp_meter_detach_free(dp_meter, meter_id);
> > +        ovs_mutex_unlock(&dp_meter->lock);
> >      }
> >      return error;
> >  }
> >
>
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d393aab..5474d52 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -98,9 +98,11 @@  DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
-enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
-enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
+
+/* Maximum number of meters in the table. */
+#define METER_ENTRY_MAX (1 << 19)
+/* Maximum number of bands / meter. */
+#define METER_BAND_MAX (8)
 
 COVERAGE_DEFINE(datapath_drop_meter);
 COVERAGE_DEFINE(datapath_drop_upcall_error);
@@ -280,6 +282,9 @@  struct dp_meter_band {
 };
 
 struct dp_meter {
+    struct cmap_node node;
+    struct ovs_mutex lock;
+    uint32_t id;
     uint16_t flags;
     uint16_t n_bands;
     uint32_t max_delta_t;
@@ -289,6 +294,12 @@  struct dp_meter {
     struct dp_meter_band bands[];
 };
 
+struct dp_netdev_meter {
+    struct cmap table OVS_GUARDED;
+    struct ovs_mutex lock;  /* Used for meter table. */
+    uint32_t hash_basis;
+};
+
 struct pmd_auto_lb {
     bool auto_lb_requested;     /* Auto load balancing requested by user. */
     bool is_enabled;            /* Current status of Auto load balancing. */
@@ -329,8 +340,7 @@  struct dp_netdev {
     atomic_uint32_t tx_flush_interval;
 
     /* Meters. */
-    struct ovs_mutex meter_locks[N_METER_LOCKS];
-    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+    struct dp_netdev_meter *meter;
 
     /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
     OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
@@ -378,19 +388,6 @@  struct dp_netdev {
     struct pmd_auto_lb pmd_alb;
 };
 
-static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                     odp_port_t)
     OVS_REQUIRES(dp->port_mutex);
@@ -1523,6 +1520,84 @@  choose_port(struct dp_netdev *dp, const char *name)
     return ODPP_NONE;
 }
 
+static uint32_t
+dp_meter_hash(uint32_t meter_id, uint32_t basis)
+{
+    return hash_bytes32(&meter_id, sizeof meter_id, basis);
+}
+
+static void
+dp_netdev_meter_init(struct dp_netdev *dp)
+{
+    struct dp_netdev_meter *dp_meter;
+
+    dp_meter = xmalloc(sizeof *dp_meter);
+
+    cmap_init(&dp_meter->table);
+    ovs_mutex_init(&dp_meter->lock);
+    dp_meter->hash_basis = random_uint32();
+
+    dp->meter = dp_meter;
+}
+
+static void
+dp_netdev_meter_destroy(struct dp_netdev *dp)
+{
+    struct dp_netdev_meter *dp_meter = dp->meter;
+    struct dp_meter *meter;
+
+    ovs_mutex_lock(&dp_meter->lock);
+    CMAP_FOR_EACH (meter, node, &dp_meter->table) {
+            cmap_remove(&dp_meter->table, &meter->node,
+                        dp_meter_hash(meter->id, dp_meter->hash_basis));
+
+            ovsrcu_postpone(free, meter);
+    }
+
+    cmap_destroy(&dp_meter->table);
+    ovs_mutex_unlock(&dp_meter->lock);
+
+    ovs_mutex_destroy(&dp_meter->lock);
+    free(dp_meter);
+}
+
+static struct dp_meter*
+dp_meter_lookup(struct dp_netdev_meter *dp_meter, uint32_t meter_id)
+{
+    uint32_t hash = dp_meter_hash(meter_id, dp_meter->hash_basis);
+    struct dp_meter *meter;
+
+    CMAP_FOR_EACH_WITH_HASH (meter, node, hash, &dp_meter->table) {
+        if (meter->id == meter_id) {
+            return meter;
+        }
+    }
+
+    return NULL;
+}
+
+static void
+dp_meter_detach_free(struct dp_netdev_meter *dp_meter, uint32_t meter_id)
+                     OVS_REQUIRES(dp_meter->lock)
+{
+    struct dp_meter *meter;
+
+    meter = dp_meter_lookup(dp_meter, meter_id);
+    if (meter) {
+            cmap_remove(&dp_meter->table, &meter->node,
+                        dp_meter_hash(meter_id, dp_meter->hash_basis));
+            ovsrcu_postpone(free, meter);
+    }
+}
+
+static void
+dp_meter_attach(struct dp_netdev_meter *dp_meter, struct dp_meter *meter)
+                OVS_REQUIRES(dp_meter->lock)
+{
+    cmap_insert(&dp_meter->table, &meter->node,
+                dp_meter_hash(meter->id, dp_meter->hash_basis));
+}
+
 static int
 create_dp_netdev(const char *name, const struct dpif_class *class,
                  struct dp_netdev **dpp)
@@ -1556,9 +1631,8 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     dp->reconfigure_seq = seq_create();
     dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
 
-    for (int i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
-    }
+    /* Init meter resource. */
+    dp_netdev_meter_init(dp);
 
     /* Disable upcalls by default. */
     dp_netdev_disable_upcall(dp);
@@ -1647,16 +1721,6 @@  dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
     fat_rwlock_destroy(&dp->upcall_rwlock);
 }
 
-static void
-dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
-    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    if (dp->meters[meter_id]) {
-        free(dp->meters[meter_id]);
-        dp->meters[meter_id] = NULL;
-    }
-}
-
 /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
  * through the 'dp_netdevs' shash while freeing 'dp'. */
 static void
@@ -1694,16 +1758,7 @@  dp_netdev_free(struct dp_netdev *dp)
     /* Upcalls must be disabled at this point */
     dp_netdev_destroy_upcall_lock(dp);
 
-    int i;
-
-    for (i = 0; i < MAX_METERS; ++i) {
-        meter_lock(dp, i);
-        dp_delete_meter(dp, i);
-        meter_unlock(dp, i);
-    }
-    for (i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_destroy(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_destroy(dp);
 
     free(dp->pmd_cmask);
     free(CONST_CAST(char *, dp->name));
@@ -5708,10 +5763,10 @@  static void
 dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
                                struct ofputil_meter_features *features)
 {
-    features->max_meters = MAX_METERS;
+    features->max_meters = METER_ENTRY_MAX;
     features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
     features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
-    features->max_bands = MAX_BANDS;
+    features->max_bands = METER_BAND_MAX;
     features->max_color = 0;
 }
 
@@ -5732,14 +5787,13 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     uint32_t exceeded_rate[NETDEV_MAX_BURST];
     int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return;
     }
 
-    meter_lock(dp, meter_id);
-    meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(dp->meter, meter_id);
     if (!meter) {
-        goto out;
+        return;
     }
 
     /* Initialize as negative values. */
@@ -5747,6 +5801,7 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     /* Initialize as zeroes. */
     memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
 
+    ovs_mutex_lock(&meter->lock);
     /* All packets will hit the meter at the same time. */
     long_delta_t = now / 1000 - meter->used / 1000; /* msec */
 
@@ -5860,8 +5915,8 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
             dp_packet_batch_refill(packets_, packet, j);
         }
     }
- out:
-    meter_unlock(dp, meter_id);
+
+    ovs_mutex_unlock(&meter->lock);
 }
 
 /* Meter set/get/del processing is still single-threaded. */
@@ -5870,11 +5925,12 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
                       struct ofputil_meter_config *config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev_meter *dp_meter = dp->meter;
     uint32_t mid = meter_id.uint32;
     struct dp_meter *meter;
     int i;
 
-    if (mid >= MAX_METERS) {
+    if (mid >= METER_ENTRY_MAX) {
         return EFBIG; /* Meter_id out of range. */
     }
 
@@ -5882,7 +5938,7 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         return EBADF; /* Unsupported flags set */
     }
 
-    if (config->n_bands > MAX_BANDS) {
+    if (config->n_bands > METER_BAND_MAX) {
         return EINVAL;
     }
 
@@ -5903,6 +5959,8 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
     meter->n_bands = config->n_bands;
     meter->max_delta_t = 0;
     meter->used = time_usec();
+    meter->id = mid;
+    ovs_mutex_init(&meter->lock);
 
     /* set up bands */
     for (i = 0; i < config->n_bands; ++i) {
@@ -5928,10 +5986,12 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         }
     }
 
-    meter_lock(dp, mid);
-    dp_delete_meter(dp, mid); /* Free existing meter, if any */
-    dp->meters[mid] = meter;
-    meter_unlock(dp, mid);
+    ovs_mutex_lock(&dp_meter->lock);
+
+    dp_meter_detach_free(dp_meter, mid); /* Free existing meter, if any */
+    dp_meter_attach(dp_meter, meter);
+
+    ovs_mutex_unlock(&dp_meter->lock);
 
     return 0;
 }
@@ -5941,22 +6001,23 @@  dpif_netdev_meter_get(const struct dpif *dpif,
                       ofproto_meter_id meter_id_,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
-    const struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev *dp = get_dp_netdev(dpif);
     uint32_t meter_id = meter_id_.uint32;
-    int retval = 0;
+    const struct dp_meter *meter;
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return EFBIG;
     }
 
-    meter_lock(dp, meter_id);
-    const struct dp_meter *meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(dp->meter, meter_id);
     if (!meter) {
-        retval = ENOENT;
-        goto done;
+        return ENOENT;
     }
+
     if (stats) {
-        int i = 0;
+        int i;
+
+        ovs_mutex_lock(&meter->lock);
 
         stats->packet_in_count = meter->packet_count;
         stats->byte_in_count = meter->byte_count;
@@ -5966,12 +6027,11 @@  dpif_netdev_meter_get(const struct dpif *dpif,
             stats->bands[i].byte_count = meter->bands[i].byte_count;
         }
 
+        ovs_mutex_unlock(&meter->lock);
         stats->n_bands = i;
     }
 
-done:
-    meter_unlock(dp, meter_id);
-    return retval;
+    return 0;
 }
 
 static int
@@ -5980,15 +6040,16 @@  dpif_netdev_meter_del(struct dpif *dpif,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev_meter *dp_meter = dp->meter;
     int error;
 
     error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
     if (!error) {
         uint32_t meter_id = meter_id_.uint32;
 
-        meter_lock(dp, meter_id);
-        dp_delete_meter(dp, meter_id);
-        meter_unlock(dp, meter_id);
+        ovs_mutex_lock(&dp_meter->lock);
+        dp_meter_detach_free(dp_meter, meter_id);
+        ovs_mutex_unlock(&dp_meter->lock);
     }
     return error;
 }