[ovs-dev,RFC,v1] lib/tc: add ingress ratelimiting support for tc-offload

Message ID 1549036711-13773-1-git-send-email-pieter.jansenvanvuuren@netronome.com
State New
Headers show
Series
  • [ovs-dev,RFC,v1] lib/tc: add ingress ratelimiting support for tc-offload
Related show

Commit Message

Pieter Jansen van Vuuren Feb. 1, 2019, 3:58 p.m.
Firstly this patch introduces the notion of reserved priority, as the
filter implementing ingress policing would require the highest priority.
Secondly it allows setting rate limiters while tc-offloads has been
enabled. Lastly it installs a matchall filter that matches all traffic
and then applies a police action, when configuring an ingress rate
limiter.

An example of what to expect:

OvS CLI:
ovs-vsctl set interface <netdev_name> ingress_policing_rate=5000
ovs-vsctl set interface <netdev_name> ingress_policing_burst=100

Resulting TC filter:
filter protocol ip pref 1 matchall chain 0
filter protocol ip pref 1 matchall chain 0 handle 0x1
  not_in_hw
	action order 1:  police 0x1 rate 5Mbit burst 125Kb mtu 64Kb
action drop/continue overhead 0b
        ref 1 bind 1 installed 3 sec used 3 sec
        Action statistics:
        Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
        backlog 0b 0p requeues 0

MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
10.0.0.200 () port 0 AF_INET : demo
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

131072  16384  16384    60.13       4.49

ovs-vsctl list interface <netdev_name>
_uuid               : 2ca774e8-8b95-430f-a2c2-f8f742613ab1
admin_state         : up
...
ingress_policing_burst: 100
ingress_policing_rate: 5000
...
type                : ""

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 include/linux/pkt_cls.h  |  12 ++++
 lib/netdev-linux.c       | 131 +++++++++++++++++++++++++++++++++++----
 lib/netdev-tc-offloads.c |   2 +-
 lib/tc.c                 |   4 ++
 lib/tc.h                 |   7 +++
 5 files changed, 144 insertions(+), 12 deletions(-)

Patch

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 1384d71f9..4adea59e7 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -238,6 +238,18 @@  enum {
 	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
 };
 
+/* Match-all classifier */
+
+enum {
+	TCA_MATCHALL_UNSPEC,
+	TCA_MATCHALL_CLASSID,
+	TCA_MATCHALL_ACT,
+	TCA_MATCHALL_FLAGS,
+	__TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
 #endif /* __KERNEL__ || !HAVE_TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST */
 
 #endif /* __LINUX_PKT_CLS_WRAPPER_H */
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 25d037cb6..92cfb229d 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -113,6 +113,10 @@  COVERAGE_DEFINE(netdev_set_ethtool);
 #define TC_RTAB_SIZE 1024
 #endif
 
+#ifndef TCM_IFINDEX_MAGIC_BLOCK
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+#endif
+
 /* Linux 2.6.21 introduced struct tpacket_auxdata.
  * Linux 2.6.27 added the tp_vlan_tci member.
  * Linux 3.0 defined TP_STATUS_VLAN_VALID.
@@ -473,10 +477,10 @@  static int tc_delete_class(const struct netdev *, unsigned int handle);
 static int tc_del_qdisc(struct netdev *netdev);
 static int tc_query_qdisc(const struct netdev *netdev);
 
+void
+tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate);
 static int tc_calc_cell_log(unsigned int mtu);
 static void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu);
-static void tc_put_rtab(struct ofpbuf *, uint16_t type,
-                        const struct tc_ratespec *rate);
 static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes);
 
 struct netdev_linux {
@@ -2324,6 +2328,109 @@  exit:
     return error;
 }
 
+static struct tc_police
+tc_matchall_fill_police(uint32_t kbits_rate, uint32_t kbits_burst)
+{
+    unsigned int bsize = MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 64;
+    unsigned int bps = ((uint64_t) kbits_rate * 1000) / 8;
+    struct tc_police police;
+    struct tc_ratespec rate;
+    int mtu = 65535;
+
+    memset(&rate, 0, sizeof rate);
+    rate.rate = bps;
+    rate.cell_log = tc_calc_cell_log(mtu);
+    rate.mpu = ETH_TOTAL_MIN;
+
+    memset(&police, 0, sizeof police);
+    police.burst = tc_bytes_to_ticks(bps, bsize);
+    police.action = TC_POLICE_SHOT;
+    police.rate = rate;
+    police.mtu = mtu;
+
+    return police;
+}
+
+static void
+nl_msg_put_act_police(struct ofpbuf *request, struct tc_police police)
+{
+    size_t offset;
+
+    nl_msg_put_string(request, TCA_ACT_KIND, "police");
+    offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+    nl_msg_put_unspec(request, TCA_POLICE_TBF, &police, sizeof police);
+    tc_put_rtab(request, TCA_POLICE_RATE, &police.rate);
+    nl_msg_put_u32(request, TCA_POLICE_RESULT, TC_ACT_UNSPEC);
+    nl_msg_end_nested(request, offset);
+}
+
+static int
+tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
+                        uint32_t kbits_burst)
+{
+    uint16_t eth_type = (OVS_FORCE uint16_t) htons(ETH_P_ALL);
+    size_t basic_offset, action_offset, inner_offset;
+    uint16_t prio = TC_RESERVED_PRIORITY_POLICE;
+    int ifindex, index, err = 0;
+    struct tc_police pol_act;
+    uint32_t block_id = 0;
+    struct ofpbuf request;
+    struct ofpbuf *reply;
+    struct tcmsg *tcmsg;
+    uint32_t handle = 1;
+
+    err = get_ifindex(netdev, &ifindex);
+    if (err) {
+        return err;
+    }
+
+    index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+    tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
+                            &request);
+    tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+    tcmsg->tcm_info = tc_make_handle(prio, eth_type);
+    tcmsg->tcm_handle = handle;
+
+    pol_act = tc_matchall_fill_police(kbits_rate, kbits_burst);
+    nl_msg_put_string(&request, TCA_KIND, "matchall");
+    basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+    action_offset = nl_msg_start_nested(&request, TCA_MATCHALL_ACT);
+    inner_offset = nl_msg_start_nested(&request, 1);
+    nl_msg_put_act_police(&request, pol_act);
+    nl_msg_end_nested(&request, inner_offset);
+    nl_msg_end_nested(&request, action_offset);
+    nl_msg_end_nested(&request, basic_offset);
+
+    err = tc_transact(&request, &reply);
+    if (!err) {
+        struct tcmsg *tc =
+            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+        ofpbuf_delete(reply);
+    }
+
+    return err;
+}
+
+static int
+tc_del_matchall_policer(struct netdev *netdev)
+{
+    uint32_t block_id = 0;
+    int ifindex;
+    int err;
+
+    err = get_ifindex(netdev, &ifindex);
+    if (err) {
+        return err;
+    }
+
+    err = tc_del_filter(ifindex, TC_RESERVED_PRIORITY_POLICE, 1, block_id);
+    if (err) {
+        return err;
+    }
+
+    return 0;
+}
+
 /* Attempts to set input rate limiting (policing) policy.  Returns 0 if
  * successful, otherwise a positive errno value. */
 static int
@@ -2335,14 +2442,6 @@  netdev_linux_set_policing(struct netdev *netdev_,
     int ifindex;
     int error;
 
-    if (netdev_is_flow_api_enabled()) {
-        if (kbits_rate) {
-            VLOG_WARN_RL(&rl, "%s: policing with offload isn't supported",
-                         netdev_name);
-        }
-        return EOPNOTSUPP;
-    }
-
     kbits_burst = (!kbits_rate ? 0       /* Force to 0 if no rate specified. */
                    : !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
                    : kbits_burst);       /* Stick with user-specified value. */
@@ -2368,6 +2467,16 @@  netdev_linux_set_policing(struct netdev *netdev_,
         goto out;
     }
 
+    /* Use matchall for policing when offloadling ovs with tc-flower. */
+    if (netdev_is_flow_api_enabled()) {
+        error = tc_del_matchall_policer(netdev_);
+        if (kbits_rate) {
+            error = tc_add_matchall_policer(netdev_, kbits_rate, kbits_burst);
+        }
+        ovs_mutex_unlock(&netdev->mutex);
+        return error;
+    }
+
     COVERAGE_INC(netdev_set_policing);
     /* Remove any existing ingress qdisc. */
     error = tc_add_del_ingress_qdisc(ifindex, false, 0);
@@ -5481,7 +5590,7 @@  tc_fill_rate(struct tc_ratespec *rate, uint64_t Bps, int mtu)
  * attribute of the specified "type".
  *
  * See tc_calc_cell_log() above for a description of "rtab"s. */
-static void
+void
 tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate)
 {
     uint32_t *rtab;
diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 73ce7b952..cef47d1f6 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -278,7 +278,7 @@  get_prio_for_tc_flower(struct tc_flower *flower)
 {
     static struct hmap prios = HMAP_INITIALIZER(&prios);
     static struct ovs_mutex prios_lock = OVS_MUTEX_INITIALIZER;
-    static uint16_t last_prio = 0;
+    static uint16_t last_prio = TC_RESERVED_PRIORITY_MAX;
     size_t key_len = sizeof(struct tc_flower_key);
     size_t hash = hash_int((OVS_FORCE uint32_t) flower->key.eth_type, 0);
     struct prio_map_data *data;
diff --git a/lib/tc.c b/lib/tc.c
index b19f075f2..d31b9d3e4 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1389,6 +1389,10 @@  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower)
     flower->mask.eth_type = OVS_BE16_MAX;
     flower->prio = tc_get_major(tc->tcm_info);
 
+    if (flower->prio == TC_RESERVED_PRIORITY_POLICE) {
+        return 0;
+    }
+
     if (!flower->handle) {
         return EAGAIN;
     }
diff --git a/lib/tc.h b/lib/tc.h
index 7196a32d7..dfb482f02 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -41,6 +41,13 @@ 
 
 #define TC_POLICY_DEFAULT "none"
 
+enum tc_flower_reserved_prio {
+    TC_RESERVED_PRIORITY_NONE,
+    TC_RESERVED_PRIORITY_POLICE,
+    __TC_RESERVED_PRIORITY_MAX
+};
+#define TC_RESERVED_PRIORITY_MAX (__TC_RESERVED_PRIORITY_MAX -1)
+
 /* Returns tc handle 'major':'minor'. */
 static inline unsigned int
 tc_make_handle(unsigned int major, unsigned int minor)