diff mbox series

[ovs-dev,PATCH/RFC,1/7] dpif-netlink: add support for offload of meters

Message ID 20211110162858.20101-2-simon.horman@corigine.com
State RFC
Headers show
Series Allow offload of OpenFlow Meters via TC | expand

Commit Message

Simon Horman Nov. 10, 2021, 4:28 p.m. UTC
From: Baowen Zheng <baowen.zheng@corigine.com>

Allow hardware offload of meters via OVS-TC by reflecting meter
configuration into the TC datapath as TC police actions whose
lifecycle is independent of flows.

A follow-up patch will make use of such TC policer action instances
by referring to them by index in TC flower classifiers (flows added to
the TC datapath) that use OF meter actions.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Tianyu Yuan <tianyu.yuan@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
---
 lib/dpif-netdev.c          |   2 +-
 lib/dpif-netlink.c         | 114 +++++++++++++++++++++++++++++++++++--
 lib/dpif-provider.h        |   2 +-
 lib/dpif.c                 |   4 +-
 lib/dpif.h                 |   2 +-
 lib/netdev-linux.c         |   8 +--
 lib/tc.c                   |  13 +++++
 lib/tc.h                   |   7 +++
 ofproto/ofproto-dpif.c     |   4 +-
 ofproto/ofproto-provider.h |   2 +-
 ofproto/ofproto.c          |   3 +-
 11 files changed, 143 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 98453a206..bf4de3cf4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6490,7 +6490,7 @@  dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_,
 /* Meter set/get/del processing is still single-threaded. */
 static int
 dpif_netdev_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
-                      struct ofputil_meter_config *config)
+                      bool add OVS_UNUSED, struct ofputil_meter_config *config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     uint32_t mid = meter_id.uint32;
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 18cdfe6e5..3c42db0fa 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -25,6 +25,7 @@ 
 #include <net/if.h>
 #include <linux/types.h>
 #include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
 #include <poll.h>
 #include <stdlib.h>
 #include <strings.h>
@@ -60,6 +61,7 @@ 
 #include "packets.h"
 #include "random.h"
 #include "sset.h"
+#include "tc.h"
 #include "timeval.h"
 #include "unaligned.h"
 #include "util.h"
@@ -273,6 +275,16 @@  static int dpif_netlink_vport_from_ofpbuf(struct dpif_netlink_vport *,
 static int dpif_netlink_port_query__(const struct dpif_netlink *dpif,
                                      odp_port_t port_no, const char *port_name,
                                      struct dpif_port *dpif_port);
+unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
+void tc_put_rtab(struct ofpbuf *msg, uint16_t type,
+                 const struct tc_ratespec *rate);
+void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu);
+static void dpif_netlink_police_start_nested(struct ofpbuf *request, int *prio,
+                                             size_t *total_offset,
+                                             size_t *basic_offset);
+static void dpif_netlink_police_end_nested(struct ofpbuf *request,
+                                           size_t *total_offset,
+                                           size_t *basic_offset);
 
 static int
 create_nl_sock(struct dpif_netlink *dpif OVS_UNUSED, struct nl_sock **sockp)
@@ -3959,6 +3971,23 @@  dpif_netlink_ct_timeout_policy_dump_done(struct dpif *dpif OVS_UNUSED,
  * zero.  Check for that condition and disable meters on those kernels. */
 static bool probe_broken_meters(struct dpif *);
 
+static void
+dpif_netlink_police_start_nested(struct ofpbuf *request, int *prio,
+                           size_t *total_offset, size_t *basic_offset)
+{
+    *total_offset = nl_msg_start_nested(request, TCA_ACT_TAB);
+    *basic_offset = nl_msg_start_nested(request, ++*prio);
+    nl_msg_put_string(request, TCA_KIND, "police");
+}
+
+static void
+dpif_netlink_police_end_nested(struct ofpbuf *request, size_t *total_offset,
+                               size_t *basic_offset)
+{
+    nl_msg_end_nested(request, *basic_offset);
+    nl_msg_end_nested(request, *total_offset);
+}
+
 static void
 dpif_netlink_meter_init(struct dpif_netlink *dpif, struct ofpbuf *buf,
                         void *stub, size_t size, uint32_t command)
@@ -4067,9 +4096,80 @@  dpif_netlink_meter_get_features(const struct dpif *dpif_,
     ofpbuf_delete(msg);
 }
 
+/* add/remove police action for the meter configuration */
+static int
+dpif_netlink_meter_add_police(ofproto_meter_id meter_id,
+                              struct ofputil_meter_config *config,
+                              bool add)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
+    struct ofputil_meter_band * band = &config->bands[0];
+    unsigned int pkt_burst_ticks = 0, pps_rate = 0;
+    struct tc_police tc_police;
+    struct ofpbuf request;
+    struct tcamsg *tcmsg;
+    size_t total_offset;
+    size_t basic_offset;
+    size_t act_offset;
+    int mtu = 65535;
+    int prio = 0;
+    int error;
+
+    tcmsg = tc_act_make_request(RTM_NEWACTION,
+                                (add ? NLM_F_EXCL : NLM_F_REPLACE)
+                                | NLM_F_CREATE, &request);
+    if (!tcmsg) {
+        return ENODEV;
+    }
+
+    if (config->n_bands > 1) {
+        VLOG_ERR_RL(&rl, "TC offloading does not support more than one band");
+        return EINVAL;
+    }
+    memset(&tc_police, 0, sizeof tc_police);
+    dpif_netlink_police_start_nested(&request, &prio, &total_offset,
+                                     &act_offset);
+    tc_police.action = TC_POLICE_SHOT;
+    tc_police.mtu = mtu;
+    tc_police.index = METER_ID_TO_POLICY_INDEX(meter_id.uint32);
+
+    if (config->flags & OFPMF13_KBPS) {
+        tc_fill_rate(&tc_police.rate, band->rate * 1000 / 8, mtu);
+        /* Set burst value to 1/5 of rate when the burst is not specified,
+         * and ratio from bit to byte is 1/8, therefore here is an ratio of
+         * 1/40 to calculate the burst */
+        tc_police.burst = tc_bytes_to_ticks(
+            tc_police.rate.rate, band->burst_size ?\
+            band->burst_size * 1000 / 8 : tc_police.rate.rate / 40);
+    } else {
+        pps_rate = band->rate;
+        pkt_burst_ticks = tc_bytes_to_ticks(pps_rate, band->burst_size?
+                                            band->burst_size : pps_rate / 5);
+    }
+    basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+    nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof tc_police);
+    if (config->flags & OFPMF13_KBPS) {
+        tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate);
+    } else if (config->flags & OFPMF13_PKTPS) {
+        nl_msg_put_u64(&request, TCA_POLICE_PKTRATE64, (uint64_t) pps_rate);
+        nl_msg_put_u64(&request, TCA_POLICE_PKTBURST64,
+                       (uint64_t) pkt_burst_ticks);
+    }
+    nl_msg_end_nested(&request, basic_offset);
+    dpif_netlink_police_end_nested(&request, &total_offset, &act_offset);
+    error = tc_transact(&request, NULL);
+    if (error) {
+        VLOG_ERR_RL(&rl, "failed to send netlink msg for meterid %u error "
+                    "%d\n", config->meter_id, error);
+        return error;
+    }
+
+    return 0;
+}
+
 static int
 dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
-                         struct ofputil_meter_config *config)
+                         bool add, struct ofputil_meter_config *config)
 {
     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
     struct ofpbuf buf, *msg;
@@ -4093,6 +4193,10 @@  dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
         }
     }
 
+    if (netdev_is_flow_api_enabled()) {
+        dpif_netlink_meter_add_police(meter_id, config, add);
+    }
+
     dpif_netlink_meter_init(dpif, &buf, stub, sizeof stub, OVS_METER_CMD_SET);
 
     nl_msg_put_u32(&buf, OVS_METER_ATTR_ID, meter_id.uint32);
@@ -4146,13 +4250,13 @@  dpif_netlink_meter_set__(struct dpif *dpif_, ofproto_meter_id meter_id,
 
 static int
 dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
-                       struct ofputil_meter_config *config)
+                       bool add, struct ofputil_meter_config *config)
 {
     if (probe_broken_meters(dpif_)) {
         return ENOMEM;
     }
 
-    return dpif_netlink_meter_set__(dpif_, meter_id, config);
+    return dpif_netlink_meter_set__(dpif_, meter_id, add, config);
 }
 
 /* Retrieve statistics and/or delete meter 'meter_id'.  Statistics are
@@ -4270,8 +4374,8 @@  probe_broken_meters__(struct dpif *dpif)
     /* Try adding two meters and make sure that they both come back with
      * the proper meter id.  Use the "__" version so that we don't cause
      * a recurve deadlock. */
-    dpif_netlink_meter_set__(dpif, id1, &config1);
-    dpif_netlink_meter_set__(dpif, id2, &config2);
+    dpif_netlink_meter_set__(dpif, id1, true, &config1);
+    dpif_netlink_meter_set__(dpif, id2, true, &config2);
 
     if (dpif_netlink_meter_get(dpif, id1, NULL, 0)
         || dpif_netlink_meter_get(dpif, id2, NULL, 0)) {
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 27e3a7658..0d092de9b 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -608,7 +608,7 @@  struct dpif_class {
      *
      * The meter id specified through 'config->meter_id' is ignored. */
     int (*meter_set)(struct dpif *, ofproto_meter_id meter_id,
-                     struct ofputil_meter_config *);
+                     bool add, struct ofputil_meter_config *);
 
     /* Queries 'dpif' for meter stats with the given 'meter_id'.  Stores
      * maximum of 'n_bands' meter statistics, returning the number of band
diff --git a/lib/dpif.c b/lib/dpif.c
index 38bcb47cb..069f17863 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1928,7 +1928,7 @@  dpif_meter_get_features(const struct dpif *dpif,
  * The meter id specified through 'config->meter_id' is ignored. */
 int
 dpif_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
-               struct ofputil_meter_config *config)
+               bool add, struct ofputil_meter_config *config)
 {
     COVERAGE_INC(dpif_meter_set);
 
@@ -1950,7 +1950,7 @@  dpif_meter_set(struct dpif *dpif, ofproto_meter_id meter_id,
         }
     }
 
-    int error = dpif->dpif_class->meter_set(dpif, meter_id, config);
+    int error = dpif->dpif_class->meter_set(dpif, meter_id, add, config);
     if (!error) {
         VLOG_DBG_RL(&dpmsg_rl, "%s: DPIF meter %"PRIu32" set",
                     dpif_name(dpif), meter_id.uint32);
diff --git a/lib/dpif.h b/lib/dpif.h
index 8febfb9f6..4398a4077 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -891,7 +891,7 @@  void dpif_print_packet(struct dpif *, struct dpif_upcall *);
 void dpif_meter_get_features(const struct dpif *,
                              struct ofputil_meter_features *);
 int dpif_meter_set(struct dpif *, ofproto_meter_id meter_id,
-                   struct ofputil_meter_config *);
+                   bool add, struct ofputil_meter_config *);
 int dpif_meter_get(const struct dpif *, ofproto_meter_id meter_id,
                    struct ofputil_meter_stats *, uint16_t n_bands);
 int dpif_meter_del(struct dpif *, ofproto_meter_id meter_id,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 97bd21be4..5ace3ca02 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -477,7 +477,7 @@  static const struct tc_ops *const tcs[] = {
 };
 
 static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks);
-static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
+unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size);
 static unsigned int tc_buffer_per_jiffy(unsigned int rate);
 static uint32_t tc_time_to_ticks(uint32_t time);
 
@@ -506,7 +506,7 @@  static int tc_query_qdisc(const struct netdev *netdev);
 void
 tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate);
 static int tc_calc_cell_log(unsigned int mtu);
-static void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu);
+void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu);
 static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes);
 
 
@@ -5746,7 +5746,7 @@  tc_ticks_to_bytes(unsigned int rate, unsigned int ticks)
 
 /* Returns the number of ticks that it would take to transmit 'size' bytes at a
  * rate of 'rate' bytes per second. */
-static unsigned int
+unsigned int
 tc_bytes_to_ticks(unsigned int rate, unsigned int size)
 {
     read_psched();
@@ -6106,7 +6106,7 @@  tc_calc_cell_log(unsigned int mtu)
 
 /* Initializes 'rate' properly for a rate of 'Bps' bytes per second with an MTU
  * of 'mtu'. */
-static void
+void
 tc_fill_rate(struct tc_ratespec *rate, uint64_t Bps, int mtu)
 {
     memset(rate, 0, sizeof *rate);
diff --git a/lib/tc.c b/lib/tc.c
index 38a1dfc0e..3d54cd28e 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -199,6 +199,19 @@  tc_make_request(int ifindex, int type, unsigned int flags,
     return tcmsg;
 }
 
+struct tcamsg *
+tc_act_make_request(int type, unsigned int flags, struct ofpbuf *request)
+{
+    struct tcamsg *tcamsg;
+
+    ofpbuf_init(request, 16384);
+    nl_msg_put_nlmsghdr(request, sizeof *tcamsg, type, NLM_F_REQUEST | flags);
+    tcamsg = ofpbuf_put_zeros(request, sizeof *tcamsg);
+    tcamsg->tca_family = AF_UNSPEC;
+
+    return tcamsg;
+}
+
 static void request_from_tcf_id(struct tcf_id *id, uint16_t eth_type,
                                 int type, unsigned int flags,
                                 struct ofpbuf *request)
diff --git a/lib/tc.h b/lib/tc.h
index a147ca461..2408a0e92 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -52,6 +52,11 @@  enum tc_flower_reserved_prio {
 };
 #define TC_RESERVED_PRIORITY_MAX (__TC_RESERVED_PRIORITY_MAX -1)
 
+/* Mapping meter_id.uint32 into a 32-bit integer, first 8 fixed bits(ff) is
+ * the prefix of meter related policy, following 16 bits are mapped by
+ * meter_id, last 8 bits are reserved for bands in further. */
+#define METER_ID_TO_POLICY_INDEX(meter_id) 0xff << 24 | (meter_id + 1) << 8
+
 enum tc_qdisc_hook {
     TC_INGRESS,
     TC_EGRESS,
@@ -78,6 +83,8 @@  tc_get_minor(unsigned int handle)
     return TC_H_MIN(handle);
 }
 
+struct tcamsg *tc_act_make_request(int type, unsigned int flags,
+                                   struct ofpbuf *request);
 struct tcmsg *tc_make_request(int ifindex, int type,
                               unsigned int flags, struct ofpbuf *);
 int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index cba49a99e..5b89f5909 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -6688,7 +6688,7 @@  meter_get_features(const struct ofproto *ofproto_,
 
 static enum ofperr
 meter_set(struct ofproto *ofproto_, ofproto_meter_id *meter_id,
-          struct ofputil_meter_config *config)
+          bool add, struct ofputil_meter_config *config)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
 
@@ -6703,7 +6703,7 @@  meter_set(struct ofproto *ofproto_, ofproto_meter_id *meter_id,
         }
     }
 
-    switch (dpif_meter_set(ofproto->backer->dpif, *meter_id, config)) {
+    switch (dpif_meter_set(ofproto->backer->dpif, *meter_id, add, config)) {
     case 0:
         return 0;
     case EFBIG: /* meter_id out of range */
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 57c7d17cb..5338971b2 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -1836,7 +1836,7 @@  struct ofproto_class {
      * leaving '*id' unchanged.  On failure, the existing meter configuration
      * is left intact. */
     enum ofperr (*meter_set)(struct ofproto *ofproto, ofproto_meter_id *id,
-                             struct ofputil_meter_config *config);
+                             bool add, struct ofputil_meter_config *config);
 
     /* Gets the meter and meter band packet and byte counts for maximum of
      * 'n_bands' bands for the meter with provider ID 'id' within 'ofproto'.
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index bd6103b1c..614bcec7c 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -6790,7 +6790,7 @@  handle_add_meter(struct ofproto *ofproto, struct ofputil_meter_mod *mm)
     }
 
     error = ofproto->ofproto_class->meter_set(ofproto, &provider_meter_id,
-                                              &mm->meter);
+                                              true, &mm->meter);
     if (!error) {
         ovs_assert(provider_meter_id.uint32 != UINT32_MAX);
         meter = meter_create(&mm->meter, provider_meter_id);
@@ -6813,6 +6813,7 @@  handle_modify_meter(struct ofproto *ofproto, struct ofputil_meter_mod *mm)
     provider_meter_id = meter->provider_meter_id.uint32;
     error = ofproto->ofproto_class->meter_set(ofproto,
                                               &meter->provider_meter_id,
+                                              false,
                                               &mm->meter);
     ovs_assert(meter->provider_meter_id.uint32 == provider_meter_id);
     if (!error) {