@@ -19,28 +19,29 @@
#include <errno.h>
#include <linux/if_ether.h>
+#include "cmap.h"
+#include "dpif-provider.h"
#include "dpif.h"
#include "hash.h"
#include "id-pool.h"
-#include "openvswitch/hmap.h"
-#include "openvswitch/match.h"
-#include "openvswitch/ofpbuf.h"
-#include "openvswitch/thread.h"
-#include "openvswitch/types.h"
-#include "openvswitch/util.h"
-#include "openvswitch/vlog.h"
#include "netdev-linux.h"
#include "netdev-offload-provider.h"
#include "netdev-provider.h"
#include "netdev-vport.h"
-#include "netlink.h"
#include "netlink-socket.h"
+#include "netlink.h"
#include "odp-netlink.h"
#include "odp-util.h"
+#include "openvswitch/hmap.h"
+#include "openvswitch/match.h"
+#include "openvswitch/ofpbuf.h"
+#include "openvswitch/thread.h"
+#include "openvswitch/types.h"
+#include "openvswitch/util.h"
+#include "openvswitch/vlog.h"
#include "tc.h"
#include "unaligned.h"
#include "util.h"
-#include "dpif-provider.h"
VLOG_DEFINE_THIS_MODULE(netdev_offload_tc);
@@ -104,6 +105,125 @@ static void parse_tc_flower_to_stats(struct tc_flower *flower,
static int get_ufid_adjust_stats(const ovs_u128 *ufid,
struct dpif_flow_stats *stats);
+/* When offloading sample action to TC, userspace creates a unique ID
+ * to map sample action and tunnel info and passes this ID to kernel
+ * instead of the sample info. Kernel will send this ID and sampled
+ * packet to userspace. Using the ID, userspace can recover the sample
+ * info and send sampled packet to the right sample monitoring host. */
+struct offload_sample {
+ uint16_t type; /* enum user_action_cookie_type. */
+ struct nlattr *action; /* Sample action. Used in flow_get. */
+ struct nlattr *userdata; /* Struct user_action_cookie. */
+ struct nlattr *userspace_actions; /* All actions to get output tunnel. */
+ struct flow_tnl *tunnel; /* Input tunnel. */
+ uint16_t ifindex; /* Input ifindex. */
+};
+
+/* This maps a sample group ID to struct offload_sample. */
+struct sgid_node {
+ struct cmap_node id_node;
+ uint32_t id;
+ struct offload_sample sample;
+};
+
+/* The sgid_map mutex protects the sample_group_ids and the sgid_map for
+ * cmap_insert(), cmap_remove(), or cmap_replace() operations. */
+static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER;
+static struct cmap sgid_map = CMAP_INITIALIZER;
+static struct id_pool *sample_group_ids OVS_GUARDED_BY(sgid_lock);
+
+static void
+sgid_node_free(struct sgid_node *node)
+{
+ if (node) {
+ if (node->id) {
+ ovs_mutex_lock(&sgid_lock);
+ id_pool_free_id(sample_group_ids, node->id);
+ ovs_mutex_unlock(&sgid_lock);
+ }
+ free(node->sample.tunnel);
+ free(node->sample.action);
+ free(node->sample.userspace_actions);
+ free(node->sample.userdata);
+ free(node);
+ }
+}
+
+static struct sgid_node *
+sgid_find(uint32_t id)
+{
+ const struct cmap_node *node = cmap_find(&sgid_map, id);
+
+ return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL;
+}
+
+static void
+offload_sample_clone(struct offload_sample *dst,
+ const struct offload_sample *src,
+ bool steal_userspace_actions)
+{
+ dst->type = src->type;
+ dst->action = nullable_xmemdup(src->action, src->action ?
+ src->action->nla_len : 0);
+ if (steal_userspace_actions) {
+ dst->userspace_actions = src->userspace_actions;
+ } else {
+ dst->userspace_actions = src->userspace_actions
+ ? xmemdup(src->userspace_actions,
+ src->userspace_actions->nla_len)
+ : NULL;
+ }
+ dst->userdata = nullable_xmemdup(src->userdata, src->userdata ?
+ src->userdata->nla_len :
+ 0);
+ dst->tunnel = nullable_xmemdup(src->tunnel, sizeof *src->tunnel);
+ dst->ifindex = src->ifindex;
+}
+
+/* Allocate a unique group id for the given set of flow metadata. The id
+ * space is 2**32 - 1. 0 is reserved. */
+static uint32_t
+sgid_alloc(const struct offload_sample *sample)
+{
+ struct sgid_node *node = xzalloc(sizeof *node);
+
+ offload_sample_clone(&node->sample, sample, true);
+
+ ovs_mutex_lock(&sgid_lock);
+
+ if (id_pool_alloc_id(sample_group_ids, &node->id)) {
+ cmap_insert(&sgid_map, &node->id_node, node->id);
+ } else {
+ VLOG_ERR("Can't find a free sample group ID");
+ sgid_node_free(node);
+ node = NULL;
+ }
+
+ ovs_mutex_unlock(&sgid_lock);
+
+ return node ? node->id : 0;
+}
+
+static void
+sgid_free(uint32_t id)
+{
+ struct sgid_node *node;
+
+ if (!id) {
+ return;
+ }
+
+ node = sgid_find(id);
+ if (node) {
+ ovs_mutex_lock(&sgid_lock);
+ cmap_remove(&sgid_map, &node->id_node, node->id);
+ ovs_mutex_unlock(&sgid_lock);
+ ovsrcu_postpone(sgid_node_free, node);
+ } else {
+ VLOG_ERR("Freeing nonexistent sample group ID: %"PRIu32, id);
+ }
+}
+
static bool
is_internal_port(const char *type)
{
@@ -193,12 +313,17 @@ static struct netlink_field set_flower_map[][4] = {
static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
+#define MAX_TC_SAMPLES_PER_FLOW 1
+
/**
* struct ufid_tc_data - data entry for ufid-tc hashmaps.
* @ufid_to_tc_node: Element in @ufid_to_tc hash table by ufid key.
* @tc_to_ufid_node: Element in @tc_to_ufid hash table by tcf_id key.
* @ufid: ufid assigned to the flow
* @id: tc filter id (tcf_id)
+ * @sample_group_id: Mapping id for sample actions. Currently only supports
+ a single sFlow action. Use an array to support multiple
+ similar actions or additional sample types in the future.
* @netdev: netdev associated with the tc rule
* @adjust_stats: When flow gets updated with new actions, we need to adjust
* the reported stats to include previous values as the hardware
@@ -209,6 +334,7 @@ struct ufid_tc_data {
struct hmap_node tc_to_ufid_node;
ovs_u128 ufid;
struct tcf_id id;
+ uint32_t sample_group_id[MAX_TC_SAMPLES_PER_FLOW];
struct netdev *netdev;
struct dpif_flow_stats adjust_stats;
};
@@ -232,6 +358,7 @@ del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
netdev_close(data->netdev);
+ sgid_free(data->sample_group_id[0]);
free(data);
}
@@ -2195,6 +2322,14 @@ netdev_tc_parse_nl_actions(struct netdev *netdev, struct tc_flower *flower,
if (err) {
return err;
}
+ } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
+ struct offload_sample sample;
+ uint32_t sgid;
+
+ memset(&sample, 0, sizeof sample);
+ sgid = sgid_alloc(&sample);
+ sgid_free(sgid);
+ return EOPNOTSUPP;
} else {
VLOG_DBG_RL(&rl, "unsupported put action type: %d",
nl_attr_type(nla));
@@ -2996,6 +3131,9 @@ netdev_tc_init_flow_api(struct netdev *netdev)
tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE,
METER_POLICE_IDS_MAX);
ovs_mutex_unlock(&meter_police_ids_mutex);
+ ovs_mutex_lock(&sgid_lock);
+ sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
+ ovs_mutex_unlock(&sgid_lock);
ovsthread_once_done(&once);
}
@@ -190,6 +190,12 @@ xmemdup(const void *p_, size_t size)
return p;
}
+void *
+nullable_xmemdup(const void *p_, size_t size)
+{
+ return p_ == NULL || size == 0 ? NULL : xmemdup(p_, size);
+}
+
char *
xmemdup0(const char *p_, size_t length)
{
@@ -167,6 +167,7 @@ OVS_RETURNS_NONNULL void *xcalloc(size_t, size_t) MALLOC_LIKE;
OVS_RETURNS_NONNULL void *xzalloc(size_t) MALLOC_LIKE;
OVS_RETURNS_NONNULL void *xrealloc(void *, size_t);
OVS_RETURNS_NONNULL void *xmemdup(const void *, size_t) MALLOC_LIKE;
+OVS_RETURNS_NONNULL void *nullable_xmemdup(const void *, size_t) MALLOC_LIKE;
OVS_RETURNS_NONNULL char *xmemdup0(const char *, size_t) MALLOC_LIKE;
OVS_RETURNS_NONNULL char *xstrdup(const char *) MALLOC_LIKE;
char *nullable_xstrdup(const char *) MALLOC_LIKE;