diff mbox series

[ovs-dev,ovs-dev,v3,1/4] dpif-netdev: Expand the meters supported number.

Message ID 20200523103320.47497-2-xiangxia.m.yue@gmail.com
State New
Headers show
Series expand the meter table and fix bug. | expand

Commit Message

Tonghao Zhang May 23, 2020, 10:33 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

For now, ovs-vswitchd use the array of the dp_meter struct
to store meter's data, and at most, there are only 65536
(defined by MAX_METERS) meters that can be used. But in some
case, for example, in the edge gateway, we should use 200,000,
at least, meters for IP address bandwidth limitation.
Every one IP address will use two meters for its rx and tx
path[1]. In other way, ovs-vswitchd should support meter-offload
(rte_mtr_xxx api introduced by dpdk.), but there are more than
65536 meters in the hardware, such as Mellanox ConnectX-6.

This patch use array to manage the meter, but it can ben expanded.

[1].
$ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
$ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0

Cc: Ilya Maximets <i.maximets@ovn.org>
Cc: William Tu <u9012063@gmail.com>
Cc: Jarno Rajahalme <jarno@ovn.org>
Cc: Ben Pfaff <blp@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Cc: Pravin Shelar <pshelar@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
v3:
* rename n_meters -> n_allocated in dp_meter_instance
* rename count -> n_used in dp_meter_table
* rename "ti" -> "meter_inst" or "inst" in different functions/struction
* remove parenthesize for sizeof
* rename dp_netdev_meter_destroy/init to dp_netdev_meter_table_destroy/init
* fix OVS_REQUIRES style
v2:
* add comments for dp_meter_instance
* change the log
* remove extra newline
* I don't move the dp_netdev_meter_init/destroy up. because
  them depends other meters function and put all meter function
  together may make the codes clean.
---
 lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 250 insertions(+), 69 deletions(-)
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 51c888501bdf..920fef3ec572 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -99,9 +99,12 @@  DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 
 /* Configuration parameters. */
 enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
-enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
-enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
+
+/* Maximum number of meters in the table. */
+#define METER_ENTRY_MAX (200000ULL)
+/* Maximum number of bands / meter. */
+#define METER_BAND_MAX  (8)
+#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
 
 COVERAGE_DEFINE(datapath_drop_meter);
 COVERAGE_DEFINE(datapath_drop_upcall_error);
@@ -284,12 +287,26 @@  struct dp_meter {
     uint16_t flags;
     uint16_t n_bands;
     uint32_t max_delta_t;
+    uint32_t id;
+    struct ovs_mutex lock;
     uint64_t used;
     uint64_t packet_count;
     uint64_t byte_count;
     struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+    uint32_t n_allocated;
+    /* Followed by struct dp_meter[n]; where n is the n_allocated. */
+    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
+};
+
+struct dp_meter_table {
+    OVSRCU_TYPE(struct dp_meter_instance *) meter_inst;
+    uint32_t n_used;
+    struct ovs_mutex lock;
+};
+
 struct pmd_auto_lb {
     bool auto_lb_requested;     /* Auto load balancing requested by user. */
     bool is_enabled;            /* Current status of Auto load balancing. */
@@ -330,8 +347,7 @@  struct dp_netdev {
     atomic_uint32_t tx_flush_interval;
 
     /* Meters. */
-    struct ovs_mutex meter_locks[N_METER_LOCKS];
-    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+    struct dp_meter_table meter_tbl;
 
     /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
     OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
@@ -379,19 +395,6 @@  struct dp_netdev {
     struct pmd_auto_lb pmd_alb;
 };
 
-static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
-    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
-}
-
-
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                     odp_port_t)
     OVS_REQUIRES(dp->port_mutex);
@@ -1524,6 +1527,9 @@  choose_port(struct dp_netdev *dp, const char *name)
     return ODPP_NONE;
 }
 
+static void dp_netdev_meter_table_init(struct dp_meter_table *tbl);
+static void dp_netdev_meter_table_destroy(struct dp_meter_table *tbl);
+
 static int
 create_dp_netdev(const char *name, const struct dpif_class *class,
                  struct dp_netdev **dpp)
@@ -1557,9 +1563,7 @@  create_dp_netdev(const char *name, const struct dpif_class *class,
     dp->reconfigure_seq = seq_create();
     dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
 
-    for (int i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_table_init(&dp->meter_tbl);
 
     /* Disable upcalls by default. */
     dp_netdev_disable_upcall(dp);
@@ -1648,16 +1652,6 @@  dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
     fat_rwlock_destroy(&dp->upcall_rwlock);
 }
 
-static void
-dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
-    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
-{
-    if (dp->meters[meter_id]) {
-        free(dp->meters[meter_id]);
-        dp->meters[meter_id] = NULL;
-    }
-}
-
 /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
  * through the 'dp_netdevs' shash while freeing 'dp'. */
 static void
@@ -1695,16 +1689,7 @@  dp_netdev_free(struct dp_netdev *dp)
     /* Upcalls must be disabled at this point */
     dp_netdev_destroy_upcall_lock(dp);
 
-    int i;
-
-    for (i = 0; i < MAX_METERS; ++i) {
-        meter_lock(dp, i);
-        dp_delete_meter(dp, i);
-        meter_unlock(dp, i);
-    }
-    for (i = 0; i < N_METER_LOCKS; ++i) {
-        ovs_mutex_destroy(&dp->meter_locks[i]);
-    }
+    dp_netdev_meter_table_destroy(&dp->meter_tbl);
 
     free(dp->pmd_cmask);
     free(CONST_CAST(char *, dp->name));
@@ -5714,14 +5699,197 @@  dp_netdev_disable_upcall(struct dp_netdev *dp)
 
 
 /* Meters */
+static uint32_t
+meter_hash(struct dp_meter_instance *inst, uint32_t id)
+{
+    uint32_t n_allocated = inst->n_allocated;
+
+    return id % n_allocated;
+}
+
+static void
+dp_meter_free(struct dp_meter *meter)
+{
+    ovs_mutex_destroy(&meter->lock);
+    free(meter);
+}
+
+static struct dp_meter_instance *
+dp_meter_instance_alloc(const uint32_t size)
+{
+    struct dp_meter_instance *inst;
+
+    inst = xzalloc(sizeof *inst + sizeof(struct dp_meter *) * size);
+    inst->n_allocated = size;
+
+    return inst;
+}
+
+static void
+dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
+{
+    struct dp_meter_instance *new_inst;
+    struct dp_meter_instance *inst;
+    int n_meters;
+    int i;
+
+    new_inst = dp_meter_instance_alloc(size);
+
+    inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    n_meters = MIN(size, inst->n_allocated);
+
+    for (i = 0; i < n_meters; i++) {
+        if (ovsrcu_get(struct dp_meter *, &inst->dp_meters[i])) {
+            new_inst->dp_meters[i] = inst->dp_meters[i];
+        }
+    }
+
+    ovsrcu_set(&tbl->meter_inst, new_inst);
+    ovsrcu_postpone(free, inst);
+}
+
+static void
+dp_meter_instance_insert(struct dp_meter_instance *inst,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(inst, meter->id);
+    ovsrcu_set(&inst->dp_meters[hash], meter);
+}
+
+static void
+dp_meter_instance_remove(struct dp_meter_instance *inst,
+                         struct dp_meter *meter)
+{
+    uint32_t hash;
+
+    hash = meter_hash(inst, meter->id);
+    ovsrcu_set(&inst->dp_meters[hash], NULL);
+}
+
+static void
+dp_netdev_meter_table_init(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *inst;
+
+    inst = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+    ovsrcu_set(&tbl->meter_inst, inst);
+
+    ovs_mutex_init(&tbl->lock);
+    tbl->n_used = 0;
+}
+
+static void
+dp_netdev_meter_table_destroy(struct dp_meter_table *tbl)
+{
+    struct dp_meter_instance *inst;
+    int i;
+
+    inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    for (i = 0; i < inst->n_allocated; i++) {
+        struct dp_meter *meter;
+
+        meter = ovsrcu_get(struct dp_meter *, &inst->dp_meters[i]);
+        if (meter) {
+            ovsrcu_postpone(dp_meter_free, meter);
+        }
+    }
+
+    ovsrcu_postpone(free, inst);
+    ovs_mutex_destroy(&tbl->lock);
+}
+
+static struct dp_meter *
+dp_meter_lookup(struct dp_meter_table *tbl, uint32_t meter_id)
+{
+    struct dp_meter_instance *meter_inst;
+    struct dp_meter *meter;
+    uint32_t hash;
+
+    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    hash = meter_hash(meter_inst, meter_id);
+
+    meter = ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[hash]);
+    if (meter && meter->id == meter_id) {
+        return meter;
+    }
+
+    return NULL;
+}
+
+static void
+dp_meter_detach_free(struct dp_meter_table *tbl, uint32_t meter_id)
+    OVS_REQUIRES(tbl->lock)
+{
+    struct dp_meter_instance *meter_inst;
+    struct dp_meter *meter;
+
+    meter = dp_meter_lookup(tbl, meter_id);
+    if (!meter) {
+        return;
+    }
+
+    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    dp_meter_instance_remove(meter_inst, meter);
+    ovsrcu_postpone(dp_meter_free, meter);
+
+    tbl->n_used--;
+    /* Shrink the meter array if necessary. */
+    if (meter_inst->n_allocated > DP_METER_ARRAY_SIZE_MIN &&
+        tbl->n_used <= (meter_inst->n_allocated / 4)) {
+        int half_size = meter_inst->n_allocated / 2;
+        int i;
+
+        /* Avoid hash collision, don't move slots to other place.
+         * Make sure there are no references of meters in array
+         * which will be released.
+         */
+        for (i = half_size; i < meter_inst->n_allocated; i++) {
+            if (ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[i])) {
+                return;
+            }
+        }
+
+        dp_meter_instance_realloc(tbl, half_size);
+    }
+}
+
+static int
+dp_meter_attach(struct dp_meter_table *tbl, struct dp_meter *meter)
+    OVS_REQUIRES(tbl->lock)
+{
+    struct dp_meter_instance *meter_inst;
+    uint32_t hash;
+
+    meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst);
+    hash = meter_hash(meter_inst, meter->id);
+
+    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
+                                &meter_inst->dp_meters[hash]))) {
+        VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n",
+                  meter->id, hash, meter_inst->n_allocated);
+        return EBUSY;
+    }
+
+    dp_meter_instance_insert(meter_inst, meter);
+
+    tbl->n_used++;
+    if (tbl->n_used >= meter_inst->n_allocated) {
+        dp_meter_instance_realloc(tbl, meter_inst->n_allocated * 2);
+    }
+
+    return 0;
+}
+
 static void
 dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
                                struct ofputil_meter_features *features)
 {
-    features->max_meters = MAX_METERS;
+    features->max_meters = METER_ENTRY_MAX;
     features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
     features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
-    features->max_bands = MAX_BANDS;
+    features->max_bands = METER_BAND_MAX;
     features->max_color = 0;
 }
 
@@ -5743,14 +5911,13 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     uint32_t exceeded_rate[NETDEV_MAX_BURST];
     int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return;
     }
 
-    meter_lock(dp, meter_id);
-    meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        goto out;
+        return;
     }
 
     /* Initialize as negative values. */
@@ -5758,6 +5925,7 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
     /* Initialize as zeroes. */
     memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
 
+    ovs_mutex_lock(&meter->lock);
     /* All packets will hit the meter at the same time. */
     long_delta_t = now / 1000 - meter->used / 1000; /* msec */
 
@@ -5875,8 +6043,8 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
             dp_packet_batch_refill(packets_, packet, j);
         }
     }
- out:
-    meter_unlock(dp, meter_id);
+
+    ovs_mutex_unlock(&meter->lock);
 }
 
 /* Meter set/get/del processing is still single-threaded. */
@@ -5885,11 +6053,12 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
                       struct ofputil_meter_config *config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     uint32_t mid = meter_id.uint32;
     struct dp_meter *meter;
-    int i;
+    int err, i;
 
-    if (mid >= MAX_METERS) {
+    if (mid >= METER_ENTRY_MAX) {
         return EFBIG; /* Meter_id out of range. */
     }
 
@@ -5897,7 +6066,7 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         return EBADF; /* Unsupported flags set */
     }
 
-    if (config->n_bands > MAX_BANDS) {
+    if (config->n_bands > METER_BAND_MAX) {
         return EINVAL;
     }
 
@@ -5918,6 +6087,8 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
     meter->n_bands = config->n_bands;
     meter->max_delta_t = 0;
     meter->used = time_usec();
+    meter->id = mid;
+    ovs_mutex_init(&meter->lock);
 
     /* set up bands */
     for (i = 0; i < config->n_bands; ++i) {
@@ -5943,12 +6114,22 @@  dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         }
     }
 
-    meter_lock(dp, mid);
-    dp_delete_meter(dp, mid); /* Free existing meter, if any */
-    dp->meters[mid] = meter;
-    meter_unlock(dp, mid);
+    ovs_mutex_lock(&meter_tbl->lock);
+
+    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
+    err = dp_meter_attach(meter_tbl, meter);
+    if (err) {
+        goto unlock_out;
+    }
+
+    ovs_mutex_unlock(&meter_tbl->lock);
 
     return 0;
+
+unlock_out:
+    ovs_mutex_unlock(&meter_tbl->lock);
+    dp_meter_free(meter);
+    return err;
 }
 
 static int
@@ -5956,23 +6137,23 @@  dpif_netdev_meter_get(const struct dpif *dpif,
                       ofproto_meter_id meter_id_,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
-    const struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev *dp = get_dp_netdev(dpif);
     uint32_t meter_id = meter_id_.uint32;
-    int retval = 0;
+    const struct dp_meter *meter;
 
-    if (meter_id >= MAX_METERS) {
+    if (meter_id >= METER_ENTRY_MAX) {
         return EFBIG;
     }
 
-    meter_lock(dp, meter_id);
-    const struct dp_meter *meter = dp->meters[meter_id];
+    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
     if (!meter) {
-        retval = ENOENT;
-        goto done;
+        return ENOENT;
     }
+
     if (stats) {
         int i = 0;
 
+        ovs_mutex_lock(&meter->lock);
         stats->packet_in_count = meter->packet_count;
         stats->byte_in_count = meter->byte_count;
 
@@ -5980,13 +6161,12 @@  dpif_netdev_meter_get(const struct dpif *dpif,
             stats->bands[i].packet_count = meter->bands[i].packet_count;
             stats->bands[i].byte_count = meter->bands[i].byte_count;
         }
+        ovs_mutex_unlock(&meter->lock);
 
         stats->n_bands = i;
     }
 
-done:
-    meter_unlock(dp, meter_id);
-    return retval;
+    return 0;
 }
 
 static int
@@ -5995,15 +6175,16 @@  dpif_netdev_meter_del(struct dpif *dpif,
                       struct ofputil_meter_stats *stats, uint16_t n_bands)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
     int error;
 
     error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
     if (!error) {
         uint32_t meter_id = meter_id_.uint32;
 
-        meter_lock(dp, meter_id);
-        dp_delete_meter(dp, meter_id);
-        meter_unlock(dp, meter_id);
+        ovs_mutex_lock(&meter_tbl->lock);
+        dp_meter_detach_free(meter_tbl, meter_id);
+        ovs_mutex_unlock(&meter_tbl->lock);
     }
     return error;
 }