Message ID | 20210915124319.1683219-5-cmi@nvidia.com |
---|---|
State | Superseded |
Headers | show |
Series | Add offload support for sFlow | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
One nit, rest looks good. Thanks for removing the expired list handling, as it looks way cleaner now! //Eelco On 15 Sep 2021, at 14:43, Chris Mi wrote: > When offloading sample action to TC, userspace creates a unique ID > to map sFlow action and tunnel info and passes this ID to kernel instead > of the sFlow info. psample will send this ID and sampled packet to > userspace. Using the ID, userspace can recover the sFlow info and send > sampled packet to the right sFlow monitoring host. > > Signed-off-by: Chris Mi <cmi@nvidia.com> > Reviewed-by: Eli Britstein <elibr@nvidia.com> > --- > lib/dpif-offload-provider.h | 17 +++ > lib/netdev-offload-tc.c | 247 +++++++++++++++++++++++++++++++++++- > lib/netdev-offload.h | 1 + > lib/tc.h | 1 + > 4 files changed, 260 insertions(+), 6 deletions(-) > > diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h > index 75238c4cb..95c29c0d8 100644 > --- a/lib/dpif-offload-provider.h > +++ b/lib/dpif-offload-provider.h > @@ -17,9 +17,26 @@ > #ifndef DPIF_OFFLOAD_PROVIDER_H > #define DPIF_OFFLOAD_PROVIDER_H > > +#include "netlink-protocol.h" > +#include "openvswitch/packets.h" > +#include "openvswitch/types.h" > + > struct dpif; > struct dpif_offload_sflow; > > +/* When offloading sample action, userspace creates a unique ID to map > + * sFlow action and tunnel info and passes this ID to datapath instead > + * of the sFlow info. Datapath will send this ID and sampled packet to > + * userspace. Using the ID, userspace can recover the sFlow info and send > + * sampled packet to the right sFlow monitoring host. > + */ > +struct dpif_sflow_attr { > + const struct nlattr *action; /* SFlow action. */ > + const struct nlattr *userdata; /* Struct user_action_cookie. */ > + struct flow_tnl *tunnel; /* Tunnel info. */ > + ovs_u128 ufid; /* Flow ufid. */ > +}; > + > /* Datapath interface offload structure, to be defined by each implementation > * of a datapath interface. > */ > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c > index 9845e8d3f..5d817b6cf 100644 > --- a/lib/netdev-offload-tc.c > +++ b/lib/netdev-offload-tc.c > @@ -40,6 +40,7 @@ > #include "unaligned.h" > #include "util.h" > #include "dpif-provider.h" > +#include "cmap.h" > > VLOG_DEFINE_THIS_MODULE(netdev_offload_tc); > > @@ -62,6 +63,234 @@ struct chain_node { > uint32_t chain; > }; > > +/* This maps a psample group ID to struct dpif_sflow_attr for sFlow */ > +struct sgid_node { > + struct ovs_list exp_node OVS_GUARDED; > + struct cmap_node metadata_node; > + struct cmap_node id_node; > + struct ovs_refcount refcount; > + uint32_t hash; > + uint32_t id; > + const struct dpif_sflow_attr sflow; > +}; > + > +static struct ovs_rwlock sgid_rwlock = OVS_RWLOCK_INITIALIZER; > +static struct cmap sgid_map = CMAP_INITIALIZER; > +static struct cmap sgid_metadata_map = CMAP_INITIALIZER; > +static struct ovs_list sgid_expiring OVS_GUARDED_BY(sgid_rwlock) > + = OVS_LIST_INITIALIZER(&sgid_expiring); > +static uint32_t next_sample_group_id OVS_GUARDED_BY(sgid_rwlock) = 1; > + > +static void > +sgid_node_free(struct sgid_node *node) > +{ > + free(node->sflow.tunnel); > + free(CONST_CAST(void *, node->sflow.action)); > + free(CONST_CAST(void *, node->sflow.userdata)); > + free(node); > +} > + > +/* Lockless RCU protected lookup. If node is needed accross RCU quiescent > + * state, caller should copy the contents. */ > +static const struct sgid_node * > +sgid_find(uint32_t id) > +{ > + const struct cmap_node *node = cmap_find(&sgid_map, id); > + > + return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL; > +} > + > +const struct dpif_sflow_attr * > +dpif_offload_sflow_attr_find(uint32_t id) > +{ > + const struct sgid_node *node; > + > + node = sgid_find(id); > + if (!node) { > + return NULL; > + } > + > + return &node->sflow; > +} > + > +static uint32_t > +dpif_sflow_attr_hash(const struct dpif_sflow_attr *sflow) > +{ > + return hash_bytes(&sflow->ufid, sizeof sflow->ufid, 0); > +} > + > +static bool > +dpif_sflow_attr_equal(const struct dpif_sflow_attr *a, > + const struct dpif_sflow_attr *b) > +{ > + if (!ovs_u128_equals(a->ufid, b->ufid)) { > + return false; > + } > + if (!a->action || !b->action) { > + return false; > + } > + if (a->action->nla_len != b->action->nla_len > + || memcmp(a->action, b->action, a->action->nla_len)) { > + return false; > + } > + if (!a->tunnel && !b->tunnel) { > + return true; > + } > + if (!a->tunnel || !b->tunnel) { > + return false; > + } > + > + return !memcmp(a->tunnel, b->tunnel, sizeof *a->tunnel); > +} > + > +/* Lockless RCU protected lookup. If node is needed accross RCU quiescent > + * state, caller should take a reference. */ > +static struct sgid_node * > +sgid_find_equal(const struct dpif_sflow_attr *target, uint32_t hash) > +{ > + struct sgid_node *node; > + > + CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &sgid_metadata_map) { > + if (dpif_sflow_attr_equal(&node->sflow, target)) { > + return node; > + } > + } > + return NULL; > +} > + > +static struct sgid_node * > +sgid_ref_equal(const struct dpif_sflow_attr *target, uint32_t hash) > +{ > + struct sgid_node *node; > + > + do { > + node = sgid_find_equal(target, hash); > + /* Try again if the node was released before we get the reference. */ > + } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); > + > + return node; > +} > + > +static void > +dpif_sflow_attr_clone(struct dpif_sflow_attr *new, > + const struct dpif_sflow_attr *old) > +{ > + new->action = xmemdup(old->action, old->action->nla_len); > + new->userdata = xmemdup(old->userdata, old->userdata->nla_len); > + new->tunnel = old->tunnel > + ? xmemdup(old->tunnel, sizeof *old->tunnel) > + : NULL; > + new->ufid = old->ufid; > +} > + > +/* We use the id as the hash value, which works due to cmap internal rehashing. > + * We also only insert nodes with unique IDs, so all possible hash collisions > + * remain internal to the cmap. */ > +static struct sgid_node * > +sgid_find__(uint32_t id) > + OVS_REQUIRES(sgid_rwlock) > +{ > + struct cmap_node *node = cmap_find_protected(&sgid_map, id); > + > + return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL; > +} > + > +/* Allocate a unique group id for the given set of flow metadata. The id > + * space is 2^^32, but if looping too many times, we still can't find a > + * free one, that means something is wrong, return NULL. > + */ > +#define SGID_ALLOC_RETRY_MAX 8192 > +static struct sgid_node * > +sgid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash) > +{ > + struct sgid_node *node = xzalloc(sizeof *node); > + int i = 0; > + > + node->hash = hash; > + ovs_refcount_init(&node->refcount); > + dpif_sflow_attr_clone(CONST_CAST(struct dpif_sflow_attr *, > + &node->sflow), sflow); > + > + ovs_rwlock_wrlock(&sgid_rwlock); > + for (;;) { > + node->id = next_sample_group_id++; > + if (OVS_UNLIKELY(!node->id)) { > + next_sample_group_id = 1; > + node->id = next_sample_group_id++; > + } > + /* Find if the id is free. */ > + if (OVS_LIKELY(!sgid_find__(node->id))) { > + break; > + } > + if (++i == SGID_ALLOC_RETRY_MAX) { > + VLOG_ERR("%s: Can't find a free group ID after %d retries", > + __func__, i); > + ovs_rwlock_unlock(&sgid_rwlock); > + sgid_node_free(node); > + return NULL; > + } > + } > + cmap_insert(&sgid_map, &node->id_node, node->id); > + cmap_insert(&sgid_metadata_map, &node->metadata_node, node->hash); > + ovs_rwlock_unlock(&sgid_rwlock); > + return node; > +} > + > +/* Allocate a unique group id for the given set of flow metadata and > + optional actions. */ > +static uint32_t > +sgid_alloc_ctx(const struct dpif_sflow_attr *sflow) > +{ > + uint32_t hash = dpif_sflow_attr_hash(sflow); > + struct sgid_node *node = sgid_ref_equal(sflow, hash); > + > + if (!node) { > + node = sgid_alloc__(sflow, hash); > + } > + > + return node ? node->id : 0; > +} > + > +static void > +sgid_node_unref(const struct sgid_node *node_) > + OVS_EXCLUDED(sgid_rwlock) > +{ > + struct sgid_node *node = CONST_CAST(struct sgid_node *, node_); > + > + ovs_rwlock_wrlock(&sgid_rwlock); > + if (node && ovs_refcount_unref(&node->refcount) == 1) { > + cmap_remove(&sgid_metadata_map, &node->metadata_node, node->hash); > + cmap_remove(&sgid_map, &node->id_node, node->id); > + ovsrcu_postpone(sgid_node_free, node); > + } > + ovs_rwlock_unlock(&sgid_rwlock); > +} > + > +static void > +sgid_free(uint32_t id) > +{ > + const struct sgid_node *node; > + > + if (!id) { > + return; > + } > + > + node = sgid_find(id); > + if (node) { > + sgid_node_unref(node); > + } else { > + VLOG_ERR("%s: Freeing nonexistent group ID: %"PRIu32, __func__, id); > + } > +} > + > +static int > +del_filter_and_sgid(struct tcf_id *id) > +{ > + sgid_free(id->sample_group_id); > + id->sample_group_id = 0; > + return tc_del_filter(id); > +} > + > static bool > is_internal_port(const char *type) > { > @@ -198,13 +427,13 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) > ovs_mutex_unlock(&ufid_lock); > } > > -/* Wrapper function to delete filter and ufid tc mapping */ > +/* Wrapper function to delete filter, sgid and ufid tc mapping */ > static int > -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) > +del_filter_sgid_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) > { > int err; > > - err = tc_del_filter(id); > + err = del_filter_and_sgid(id); > if (!err) { > del_ufid_tc_mapping(ufid); > } > @@ -426,7 +655,7 @@ netdev_tc_flow_flush(struct netdev *netdev) > continue; > } > > - err = tc_del_filter(&data->id); > + err = del_filter_and_sgid(&data->id); > if (!err) { > del_ufid_tc_mapping_unlocked(&data->ufid); > } > @@ -1917,6 +2146,11 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > action->type = TC_ACT_GOTO; > action->chain = 0; /* 0 is reserved and not used by recirc. */ > flower.action_count++; > + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) { > + struct dpif_sflow_attr sflow_attr; > + > + memset(&sflow_attr, 0, sizeof sflow_attr); > + sgid_alloc_ctx(&sflow_attr); > } else { > VLOG_DBG_RL(&rl, "unsupported put action type: %d", > nl_attr_type(nla)); > @@ -1932,7 +2166,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > if (get_ufid_tc_mapping(ufid, &id) == 0) { > VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", > id.handle, id.prio); > - info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid); > + info->tc_modify_flow_deleted = !del_filter_sgid_ufid_mapping(&id, > + ufid); > } > > prio = get_prio_for_tc_flower(&flower); > @@ -2021,7 +2256,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > } > } > > - error = del_filter_and_ufid_mapping(&id, ufid); > + error = del_filter_sgid_ufid_mapping(&id, ufid); > > return error; > } > diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h > index e7fcedae9..d57d70470 100644 > --- a/lib/netdev-offload.h > +++ b/lib/netdev-offload.h > @@ -138,6 +138,7 @@ int netdev_ports_flow_get(const char *dpif_type, struct match *match, > int netdev_ports_get_n_flows(const char *dpif_type, > odp_port_t port_no, uint64_t *n_flows); > > +const struct dpif_sflow_attr *dpif_offload_sflow_attr_find(uint32_t id); Please keep the extra new line here. > #ifdef __cplusplus > } > #endif > diff --git a/lib/tc.h b/lib/tc.h > index a147ca461..2e4084f48 100644 > --- a/lib/tc.h > +++ b/lib/tc.h > @@ -276,6 +276,7 @@ struct tcf_id { > uint32_t chain; > uint16_t prio; > uint32_t handle; > + uint32_t sample_group_id; > }; > > static inline struct tcf_id > -- > 2.27.0
On 10/1/2021 6:20 PM, Eelco Chaudron wrote: > One nit, rest looks good. Thanks for removing the expired list handling, as it looks way cleaner now! > > //Eelco > > On 15 Sep 2021, at 14:43, Chris Mi wrote: > >> When offloading sample action to TC, userspace creates a unique ID >> to map sFlow action and tunnel info and passes this ID to kernel instead >> of the sFlow info. psample will send this ID and sampled packet to >> userspace. Using the ID, userspace can recover the sFlow info and send >> sampled packet to the right sFlow monitoring host. >> >> Signed-off-by: Chris Mi <cmi@nvidia.com> >> Reviewed-by: Eli Britstein <elibr@nvidia.com> >> --- >> lib/dpif-offload-provider.h | 17 +++ >> lib/netdev-offload-tc.c | 247 +++++++++++++++++++++++++++++++++++- >> lib/netdev-offload.h | 1 + >> lib/tc.h | 1 + >> 4 files changed, 260 insertions(+), 6 deletions(-) >> >> diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h >> index 75238c4cb..95c29c0d8 100644 >> --- a/lib/dpif-offload-provider.h >> +++ b/lib/dpif-offload-provider.h >> @@ -17,9 +17,26 @@ >> #ifndef DPIF_OFFLOAD_PROVIDER_H >> #define DPIF_OFFLOAD_PROVIDER_H >> >> +#include "netlink-protocol.h" >> +#include "openvswitch/packets.h" >> +#include "openvswitch/types.h" >> + >> struct dpif; >> struct dpif_offload_sflow; >> >> +/* When offloading sample action, userspace creates a unique ID to map >> + * sFlow action and tunnel info and passes this ID to datapath instead >> + * of the sFlow info. Datapath will send this ID and sampled packet to >> + * userspace. Using the ID, userspace can recover the sFlow info and send >> + * sampled packet to the right sFlow monitoring host. >> + */ >> +struct dpif_sflow_attr { >> + const struct nlattr *action; /* SFlow action. */ >> + const struct nlattr *userdata; /* Struct user_action_cookie. */ >> + struct flow_tnl *tunnel; /* Tunnel info. */ >> + ovs_u128 ufid; /* Flow ufid. */ >> +}; >> + >> /* Datapath interface offload structure, to be defined by each implementation >> * of a datapath interface. >> */ >> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c >> index 9845e8d3f..5d817b6cf 100644 >> --- a/lib/netdev-offload-tc.c >> +++ b/lib/netdev-offload-tc.c >> @@ -40,6 +40,7 @@ >> #include "unaligned.h" >> #include "util.h" >> #include "dpif-provider.h" >> +#include "cmap.h" >> >> VLOG_DEFINE_THIS_MODULE(netdev_offload_tc); >> >> @@ -62,6 +63,234 @@ struct chain_node { >> uint32_t chain; >> }; >> >> +/* This maps a psample group ID to struct dpif_sflow_attr for sFlow */ >> +struct sgid_node { >> + struct ovs_list exp_node OVS_GUARDED; >> + struct cmap_node metadata_node; >> + struct cmap_node id_node; >> + struct ovs_refcount refcount; >> + uint32_t hash; >> + uint32_t id; >> + const struct dpif_sflow_attr sflow; >> +}; >> + >> +static struct ovs_rwlock sgid_rwlock = OVS_RWLOCK_INITIALIZER; >> +static struct cmap sgid_map = CMAP_INITIALIZER; >> +static struct cmap sgid_metadata_map = CMAP_INITIALIZER; >> +static struct ovs_list sgid_expiring OVS_GUARDED_BY(sgid_rwlock) >> + = OVS_LIST_INITIALIZER(&sgid_expiring); >> +static uint32_t next_sample_group_id OVS_GUARDED_BY(sgid_rwlock) = 1; >> + >> +static void >> +sgid_node_free(struct sgid_node *node) >> +{ >> + free(node->sflow.tunnel); >> + free(CONST_CAST(void *, node->sflow.action)); >> + free(CONST_CAST(void *, node->sflow.userdata)); >> + free(node); >> +} >> + >> +/* Lockless RCU protected lookup. If node is needed accross RCU quiescent >> + * state, caller should copy the contents. */ >> +static const struct sgid_node * >> +sgid_find(uint32_t id) >> +{ >> + const struct cmap_node *node = cmap_find(&sgid_map, id); >> + >> + return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL; >> +} >> + >> +const struct dpif_sflow_attr * >> +dpif_offload_sflow_attr_find(uint32_t id) >> +{ >> + const struct sgid_node *node; >> + >> + node = sgid_find(id); >> + if (!node) { >> + return NULL; >> + } >> + >> + return &node->sflow; >> +} >> + >> +static uint32_t >> +dpif_sflow_attr_hash(const struct dpif_sflow_attr *sflow) >> +{ >> + return hash_bytes(&sflow->ufid, sizeof sflow->ufid, 0); >> +} >> + >> +static bool >> +dpif_sflow_attr_equal(const struct dpif_sflow_attr *a, >> + const struct dpif_sflow_attr *b) >> +{ >> + if (!ovs_u128_equals(a->ufid, b->ufid)) { >> + return false; >> + } >> + if (!a->action || !b->action) { >> + return false; >> + } >> + if (a->action->nla_len != b->action->nla_len >> + || memcmp(a->action, b->action, a->action->nla_len)) { >> + return false; >> + } >> + if (!a->tunnel && !b->tunnel) { >> + return true; >> + } >> + if (!a->tunnel || !b->tunnel) { >> + return false; >> + } >> + >> + return !memcmp(a->tunnel, b->tunnel, sizeof *a->tunnel); >> +} >> + >> +/* Lockless RCU protected lookup. If node is needed accross RCU quiescent >> + * state, caller should take a reference. */ >> +static struct sgid_node * >> +sgid_find_equal(const struct dpif_sflow_attr *target, uint32_t hash) >> +{ >> + struct sgid_node *node; >> + >> + CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &sgid_metadata_map) { >> + if (dpif_sflow_attr_equal(&node->sflow, target)) { >> + return node; >> + } >> + } >> + return NULL; >> +} >> + >> +static struct sgid_node * >> +sgid_ref_equal(const struct dpif_sflow_attr *target, uint32_t hash) >> +{ >> + struct sgid_node *node; >> + >> + do { >> + node = sgid_find_equal(target, hash); >> + /* Try again if the node was released before we get the reference. */ >> + } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); >> + >> + return node; >> +} >> + >> +static void >> +dpif_sflow_attr_clone(struct dpif_sflow_attr *new, >> + const struct dpif_sflow_attr *old) >> +{ >> + new->action = xmemdup(old->action, old->action->nla_len); >> + new->userdata = xmemdup(old->userdata, old->userdata->nla_len); >> + new->tunnel = old->tunnel >> + ? xmemdup(old->tunnel, sizeof *old->tunnel) >> + : NULL; >> + new->ufid = old->ufid; >> +} >> + >> +/* We use the id as the hash value, which works due to cmap internal rehashing. >> + * We also only insert nodes with unique IDs, so all possible hash collisions >> + * remain internal to the cmap. */ >> +static struct sgid_node * >> +sgid_find__(uint32_t id) >> + OVS_REQUIRES(sgid_rwlock) >> +{ >> + struct cmap_node *node = cmap_find_protected(&sgid_map, id); >> + >> + return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL; >> +} >> + >> +/* Allocate a unique group id for the given set of flow metadata. The id >> + * space is 2^^32, but if looping too many times, we still can't find a >> + * free one, that means something is wrong, return NULL. >> + */ >> +#define SGID_ALLOC_RETRY_MAX 8192 >> +static struct sgid_node * >> +sgid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash) >> +{ >> + struct sgid_node *node = xzalloc(sizeof *node); >> + int i = 0; >> + >> + node->hash = hash; >> + ovs_refcount_init(&node->refcount); >> + dpif_sflow_attr_clone(CONST_CAST(struct dpif_sflow_attr *, >> + &node->sflow), sflow); >> + >> + ovs_rwlock_wrlock(&sgid_rwlock); >> + for (;;) { >> + node->id = next_sample_group_id++; >> + if (OVS_UNLIKELY(!node->id)) { >> + next_sample_group_id = 1; >> + node->id = next_sample_group_id++; >> + } >> + /* Find if the id is free. */ >> + if (OVS_LIKELY(!sgid_find__(node->id))) { >> + break; >> + } >> + if (++i == SGID_ALLOC_RETRY_MAX) { >> + VLOG_ERR("%s: Can't find a free group ID after %d retries", >> + __func__, i); >> + ovs_rwlock_unlock(&sgid_rwlock); >> + sgid_node_free(node); >> + return NULL; >> + } >> + } >> + cmap_insert(&sgid_map, &node->id_node, node->id); >> + cmap_insert(&sgid_metadata_map, &node->metadata_node, node->hash); >> + ovs_rwlock_unlock(&sgid_rwlock); >> + return node; >> +} >> + >> +/* Allocate a unique group id for the given set of flow metadata and >> + optional actions. */ >> +static uint32_t >> +sgid_alloc_ctx(const struct dpif_sflow_attr *sflow) >> +{ >> + uint32_t hash = dpif_sflow_attr_hash(sflow); >> + struct sgid_node *node = sgid_ref_equal(sflow, hash); >> + >> + if (!node) { >> + node = sgid_alloc__(sflow, hash); >> + } >> + >> + return node ? node->id : 0; >> +} >> + >> +static void >> +sgid_node_unref(const struct sgid_node *node_) >> + OVS_EXCLUDED(sgid_rwlock) >> +{ >> + struct sgid_node *node = CONST_CAST(struct sgid_node *, node_); >> + >> + ovs_rwlock_wrlock(&sgid_rwlock); >> + if (node && ovs_refcount_unref(&node->refcount) == 1) { >> + cmap_remove(&sgid_metadata_map, &node->metadata_node, node->hash); >> + cmap_remove(&sgid_map, &node->id_node, node->id); >> + ovsrcu_postpone(sgid_node_free, node); >> + } >> + ovs_rwlock_unlock(&sgid_rwlock); >> +} >> + >> +static void >> +sgid_free(uint32_t id) >> +{ >> + const struct sgid_node *node; >> + >> + if (!id) { >> + return; >> + } >> + >> + node = sgid_find(id); >> + if (node) { >> + sgid_node_unref(node); >> + } else { >> + VLOG_ERR("%s: Freeing nonexistent group ID: %"PRIu32, __func__, id); >> + } >> +} >> + >> +static int >> +del_filter_and_sgid(struct tcf_id *id) >> +{ >> + sgid_free(id->sample_group_id); >> + id->sample_group_id = 0; >> + return tc_del_filter(id); >> +} >> + >> static bool >> is_internal_port(const char *type) >> { >> @@ -198,13 +427,13 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) >> ovs_mutex_unlock(&ufid_lock); >> } >> >> -/* Wrapper function to delete filter and ufid tc mapping */ >> +/* Wrapper function to delete filter, sgid and ufid tc mapping */ >> static int >> -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) >> +del_filter_sgid_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) >> { >> int err; >> >> - err = tc_del_filter(id); >> + err = del_filter_and_sgid(id); >> if (!err) { >> del_ufid_tc_mapping(ufid); >> } >> @@ -426,7 +655,7 @@ netdev_tc_flow_flush(struct netdev *netdev) >> continue; >> } >> >> - err = tc_del_filter(&data->id); >> + err = del_filter_and_sgid(&data->id); >> if (!err) { >> del_ufid_tc_mapping_unlocked(&data->ufid); >> } >> @@ -1917,6 +2146,11 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, >> action->type = TC_ACT_GOTO; >> action->chain = 0; /* 0 is reserved and not used by recirc. */ >> flower.action_count++; >> + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) { >> + struct dpif_sflow_attr sflow_attr; >> + >> + memset(&sflow_attr, 0, sizeof sflow_attr); >> + sgid_alloc_ctx(&sflow_attr); >> } else { >> VLOG_DBG_RL(&rl, "unsupported put action type: %d", >> nl_attr_type(nla)); >> @@ -1932,7 +2166,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, >> if (get_ufid_tc_mapping(ufid, &id) == 0) { >> VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", >> id.handle, id.prio); >> - info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid); >> + info->tc_modify_flow_deleted = !del_filter_sgid_ufid_mapping(&id, >> + ufid); >> } >> >> prio = get_prio_for_tc_flower(&flower); >> @@ -2021,7 +2256,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, >> } >> } >> >> - error = del_filter_and_ufid_mapping(&id, ufid); >> + error = del_filter_sgid_ufid_mapping(&id, ufid); >> >> return error; >> } >> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h >> index e7fcedae9..d57d70470 100644 >> --- a/lib/netdev-offload.h >> +++ b/lib/netdev-offload.h >> @@ -138,6 +138,7 @@ int netdev_ports_flow_get(const char *dpif_type, struct match *match, >> int netdev_ports_get_n_flows(const char *dpif_type, >> odp_port_t port_no, uint64_t *n_flows); >> >> +const struct dpif_sflow_attr *dpif_offload_sflow_attr_find(uint32_t id); > Please keep the extra new line here. Done. > >> #ifdef __cplusplus >> } >> #endif >> diff --git a/lib/tc.h b/lib/tc.h >> index a147ca461..2e4084f48 100644 >> --- a/lib/tc.h >> +++ b/lib/tc.h >> @@ -276,6 +276,7 @@ struct tcf_id { >> uint32_t chain; >> uint16_t prio; >> uint32_t handle; >> + uint32_t sample_group_id; >> }; >> >> static inline struct tcf_id >> -- >> 2.27.0
diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h index 75238c4cb..95c29c0d8 100644 --- a/lib/dpif-offload-provider.h +++ b/lib/dpif-offload-provider.h @@ -17,9 +17,26 @@ #ifndef DPIF_OFFLOAD_PROVIDER_H #define DPIF_OFFLOAD_PROVIDER_H +#include "netlink-protocol.h" +#include "openvswitch/packets.h" +#include "openvswitch/types.h" + struct dpif; struct dpif_offload_sflow; +/* When offloading sample action, userspace creates a unique ID to map + * sFlow action and tunnel info and passes this ID to datapath instead + * of the sFlow info. Datapath will send this ID and sampled packet to + * userspace. Using the ID, userspace can recover the sFlow info and send + * sampled packet to the right sFlow monitoring host. + */ +struct dpif_sflow_attr { + const struct nlattr *action; /* SFlow action. */ + const struct nlattr *userdata; /* Struct user_action_cookie. */ + struct flow_tnl *tunnel; /* Tunnel info. */ + ovs_u128 ufid; /* Flow ufid. */ +}; + /* Datapath interface offload structure, to be defined by each implementation * of a datapath interface. */ diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 9845e8d3f..5d817b6cf 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -40,6 +40,7 @@ #include "unaligned.h" #include "util.h" #include "dpif-provider.h" +#include "cmap.h" VLOG_DEFINE_THIS_MODULE(netdev_offload_tc); @@ -62,6 +63,234 @@ struct chain_node { uint32_t chain; }; +/* This maps a psample group ID to struct dpif_sflow_attr for sFlow */ +struct sgid_node { + struct ovs_list exp_node OVS_GUARDED; + struct cmap_node metadata_node; + struct cmap_node id_node; + struct ovs_refcount refcount; + uint32_t hash; + uint32_t id; + const struct dpif_sflow_attr sflow; +}; + +static struct ovs_rwlock sgid_rwlock = OVS_RWLOCK_INITIALIZER; +static struct cmap sgid_map = CMAP_INITIALIZER; +static struct cmap sgid_metadata_map = CMAP_INITIALIZER; +static struct ovs_list sgid_expiring OVS_GUARDED_BY(sgid_rwlock) + = OVS_LIST_INITIALIZER(&sgid_expiring); +static uint32_t next_sample_group_id OVS_GUARDED_BY(sgid_rwlock) = 1; + +static void +sgid_node_free(struct sgid_node *node) +{ + free(node->sflow.tunnel); + free(CONST_CAST(void *, node->sflow.action)); + free(CONST_CAST(void *, node->sflow.userdata)); + free(node); +} + +/* Lockless RCU protected lookup. If node is needed accross RCU quiescent + * state, caller should copy the contents. */ +static const struct sgid_node * +sgid_find(uint32_t id) +{ + const struct cmap_node *node = cmap_find(&sgid_map, id); + + return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL; +} + +const struct dpif_sflow_attr * +dpif_offload_sflow_attr_find(uint32_t id) +{ + const struct sgid_node *node; + + node = sgid_find(id); + if (!node) { + return NULL; + } + + return &node->sflow; +} + +static uint32_t +dpif_sflow_attr_hash(const struct dpif_sflow_attr *sflow) +{ + return hash_bytes(&sflow->ufid, sizeof sflow->ufid, 0); +} + +static bool +dpif_sflow_attr_equal(const struct dpif_sflow_attr *a, + const struct dpif_sflow_attr *b) +{ + if (!ovs_u128_equals(a->ufid, b->ufid)) { + return false; + } + if (!a->action || !b->action) { + return false; + } + if (a->action->nla_len != b->action->nla_len + || memcmp(a->action, b->action, a->action->nla_len)) { + return false; + } + if (!a->tunnel && !b->tunnel) { + return true; + } + if (!a->tunnel || !b->tunnel) { + return false; + } + + return !memcmp(a->tunnel, b->tunnel, sizeof *a->tunnel); +} + +/* Lockless RCU protected lookup. If node is needed accross RCU quiescent + * state, caller should take a reference. */ +static struct sgid_node * +sgid_find_equal(const struct dpif_sflow_attr *target, uint32_t hash) +{ + struct sgid_node *node; + + CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &sgid_metadata_map) { + if (dpif_sflow_attr_equal(&node->sflow, target)) { + return node; + } + } + return NULL; +} + +static struct sgid_node * +sgid_ref_equal(const struct dpif_sflow_attr *target, uint32_t hash) +{ + struct sgid_node *node; + + do { + node = sgid_find_equal(target, hash); + /* Try again if the node was released before we get the reference. */ + } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); + + return node; +} + +static void +dpif_sflow_attr_clone(struct dpif_sflow_attr *new, + const struct dpif_sflow_attr *old) +{ + new->action = xmemdup(old->action, old->action->nla_len); + new->userdata = xmemdup(old->userdata, old->userdata->nla_len); + new->tunnel = old->tunnel + ? xmemdup(old->tunnel, sizeof *old->tunnel) + : NULL; + new->ufid = old->ufid; +} + +/* We use the id as the hash value, which works due to cmap internal rehashing. + * We also only insert nodes with unique IDs, so all possible hash collisions + * remain internal to the cmap. */ +static struct sgid_node * +sgid_find__(uint32_t id) + OVS_REQUIRES(sgid_rwlock) +{ + struct cmap_node *node = cmap_find_protected(&sgid_map, id); + + return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL; +} + +/* Allocate a unique group id for the given set of flow metadata. The id + * space is 2^^32, but if looping too many times, we still can't find a + * free one, that means something is wrong, return NULL. + */ +#define SGID_ALLOC_RETRY_MAX 8192 +static struct sgid_node * +sgid_alloc__(const struct dpif_sflow_attr *sflow, uint32_t hash) +{ + struct sgid_node *node = xzalloc(sizeof *node); + int i = 0; + + node->hash = hash; + ovs_refcount_init(&node->refcount); + dpif_sflow_attr_clone(CONST_CAST(struct dpif_sflow_attr *, + &node->sflow), sflow); + + ovs_rwlock_wrlock(&sgid_rwlock); + for (;;) { + node->id = next_sample_group_id++; + if (OVS_UNLIKELY(!node->id)) { + next_sample_group_id = 1; + node->id = next_sample_group_id++; + } + /* Find if the id is free. */ + if (OVS_LIKELY(!sgid_find__(node->id))) { + break; + } + if (++i == SGID_ALLOC_RETRY_MAX) { + VLOG_ERR("%s: Can't find a free group ID after %d retries", + __func__, i); + ovs_rwlock_unlock(&sgid_rwlock); + sgid_node_free(node); + return NULL; + } + } + cmap_insert(&sgid_map, &node->id_node, node->id); + cmap_insert(&sgid_metadata_map, &node->metadata_node, node->hash); + ovs_rwlock_unlock(&sgid_rwlock); + return node; +} + +/* Allocate a unique group id for the given set of flow metadata and + optional actions. */ +static uint32_t +sgid_alloc_ctx(const struct dpif_sflow_attr *sflow) +{ + uint32_t hash = dpif_sflow_attr_hash(sflow); + struct sgid_node *node = sgid_ref_equal(sflow, hash); + + if (!node) { + node = sgid_alloc__(sflow, hash); + } + + return node ? node->id : 0; +} + +static void +sgid_node_unref(const struct sgid_node *node_) + OVS_EXCLUDED(sgid_rwlock) +{ + struct sgid_node *node = CONST_CAST(struct sgid_node *, node_); + + ovs_rwlock_wrlock(&sgid_rwlock); + if (node && ovs_refcount_unref(&node->refcount) == 1) { + cmap_remove(&sgid_metadata_map, &node->metadata_node, node->hash); + cmap_remove(&sgid_map, &node->id_node, node->id); + ovsrcu_postpone(sgid_node_free, node); + } + ovs_rwlock_unlock(&sgid_rwlock); +} + +static void +sgid_free(uint32_t id) +{ + const struct sgid_node *node; + + if (!id) { + return; + } + + node = sgid_find(id); + if (node) { + sgid_node_unref(node); + } else { + VLOG_ERR("%s: Freeing nonexistent group ID: %"PRIu32, __func__, id); + } +} + +static int +del_filter_and_sgid(struct tcf_id *id) +{ + sgid_free(id->sample_group_id); + id->sample_group_id = 0; + return tc_del_filter(id); +} + static bool is_internal_port(const char *type) { @@ -198,13 +427,13 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) ovs_mutex_unlock(&ufid_lock); } -/* Wrapper function to delete filter and ufid tc mapping */ +/* Wrapper function to delete filter, sgid and ufid tc mapping */ static int -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) +del_filter_sgid_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) { int err; - err = tc_del_filter(id); + err = del_filter_and_sgid(id); if (!err) { del_ufid_tc_mapping(ufid); } @@ -426,7 +655,7 @@ netdev_tc_flow_flush(struct netdev *netdev) continue; } - err = tc_del_filter(&data->id); + err = del_filter_and_sgid(&data->id); if (!err) { del_ufid_tc_mapping_unlocked(&data->ufid); } @@ -1917,6 +2146,11 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, action->type = TC_ACT_GOTO; action->chain = 0; /* 0 is reserved and not used by recirc. */ flower.action_count++; + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) { + struct dpif_sflow_attr sflow_attr; + + memset(&sflow_attr, 0, sizeof sflow_attr); + sgid_alloc_ctx(&sflow_attr); } else { VLOG_DBG_RL(&rl, "unsupported put action type: %d", nl_attr_type(nla)); @@ -1932,7 +2166,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, if (get_ufid_tc_mapping(ufid, &id) == 0) { VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", id.handle, id.prio); - info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid); + info->tc_modify_flow_deleted = !del_filter_sgid_ufid_mapping(&id, + ufid); } prio = get_prio_for_tc_flower(&flower); @@ -2021,7 +2256,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, } } - error = del_filter_and_ufid_mapping(&id, ufid); + error = del_filter_sgid_ufid_mapping(&id, ufid); return error; } diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index e7fcedae9..d57d70470 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -138,6 +138,7 @@ int netdev_ports_flow_get(const char *dpif_type, struct match *match, int netdev_ports_get_n_flows(const char *dpif_type, odp_port_t port_no, uint64_t *n_flows); +const struct dpif_sflow_attr *dpif_offload_sflow_attr_find(uint32_t id); #ifdef __cplusplus } #endif diff --git a/lib/tc.h b/lib/tc.h index a147ca461..2e4084f48 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -276,6 +276,7 @@ struct tcf_id { uint32_t chain; uint16_t prio; uint32_t handle; + uint32_t sample_group_id; }; static inline struct tcf_id