@@ -2505,10 +2505,49 @@ dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
ovs_mutex_unlock(&dp_flow_offload.mutex);
}
+static int
+partial_offload_egress_flow_del(struct dp_flow_offload_item *offload)
+{
+ struct dp_netdev_pmd_thread *pmd = offload->pmd;
+ struct dp_netdev_flow *flow = offload->flow;
+ const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
+ struct netdev *port;
+ int ret;
+
+ port = netdev_ports_get(flow->egress_offload_port, dpif_type_str);
+ if (!port) {
+ return -1;
+ }
+
+ /* Taking a global 'port_mutex' to fulfill thread safety
+ * restrictions for the netdev-offload-dpdk module. */
+ ovs_mutex_lock(&pmd->dp->port_mutex);
+ ret = netdev_flow_del(port, &flow->mega_ufid, NULL);
+ ovs_mutex_unlock(&pmd->dp->port_mutex);
+ netdev_close(port);
+
+ if (ret) {
+ return ret;
+ }
+
+ flow->egress_offload_port = NULL;
+ flow->partial_actions_offloaded = false;
+
+ VLOG_DBG_RL("%s: flow: %p mega_ufid: "UUID_FMT" pmd_id: %d\n", __func__,
+ flow, UUID_ARGS((struct uuid *)&flow->mega_ufid),
+ offload->flow->pmd_id);
+ return ret;
+}
+
static int
dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
{
- return mark_to_flow_disassociate(offload->pmd, offload->flow);
+ if (unlikely(offload->flow->partial_actions_offloaded &&
+ offload->flow->egress_offload_port != ODPP_NONE)) {
+ return partial_offload_egress_flow_del(offload);
+ } else {
+ return mark_to_flow_disassociate(offload->pmd, offload->flow);
+ }
}
static int
@@ -2568,51 +2607,82 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
struct offload_info info;
- struct netdev *port;
- uint32_t mark;
+ struct netdev *netdev;
+ odp_port_t egress_port = ODPP_NONE;
+ struct netdev *egress_netdev = NULL;
+ bool alloc_mark = true;
+ uint32_t mark = INVALID_FLOW_MARK;
int ret;
if (flow->dead) {
return -1;
}
- port = netdev_ports_get(in_port, dpif_type_str);
- if (!port) {
+ netdev = netdev_ports_get(in_port, dpif_type_str);
+ if (!netdev) {
return -1;
}
- if (dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
+ info.attr_egress = 0;
+ info.partial_actions = 0;
+
+ if (unlikely(netdev_partial_offload_egress(netdev, dpif_type_str,
+ &offload->match,
+ CONST_CAST(struct nlattr *,
+ offload->actions),
+ offload->actions_len,
+ &egress_netdev,
+ &egress_port))) {
+ if (egress_netdev) {
+ netdev_close(netdev);
+ netdev = egress_netdev;
+ flow->egress_offload_port = egress_port;
+ info.attr_egress = 1;
+ alloc_mark = false;
+ }
+ info.partial_actions = 1;
+ }
+
+ if (alloc_mark && dp_netdev_alloc_flow_mark(flow, modification, &mark)) {
/* flow already offloaded */
- netdev_close(port);
- return 0;
+ netdev_close(netdev);
+ return 0;
}
+
info.flow_mark = mark;
/* Taking a global 'port_mutex' to fulfill thread safety restrictions for
* the netdev-offload-dpdk module. */
ovs_mutex_lock(&pmd->dp->port_mutex);
- ret = netdev_flow_put(port, &offload->match,
+ ret = netdev_flow_put(netdev, &offload->match,
CONST_CAST(struct nlattr *, offload->actions),
offload->actions_len, &flow->mega_ufid, &info,
NULL);
ovs_mutex_unlock(&pmd->dp->port_mutex);
- netdev_close(port);
+ netdev_close(netdev);
if (ret) {
goto err_free;
}
- if (!modification) {
+ if (unlikely(info.partial_actions && egress_netdev)) {
+ VLOG_DBG_RL("%s: flow: %p mega_ufid: "UUID_FMT" pmd_id: %d\n",
+ __func__, flow, UUID_ARGS((struct uuid *)&flow->mega_ufid),
+ flow->pmd_id);
+ flow->partial_actions_offloaded = true;
+ } else if (!modification) {
megaflow_to_mark_associate(&flow->mega_ufid, mark);
mark_to_flow_associate(mark, flow);
}
return 0;
err_free:
- if (!modification) {
- flow_mark_free(mark);
- } else {
- mark_to_flow_disassociate(pmd, flow);
+ if (mark != INVALID_FLOW_MARK) {
+ if (!modification) {
+ flow_mark_free(mark);
+ } else {
+ mark_to_flow_disassociate(pmd, flow);
+ }
}
return -1;
}
@@ -2728,7 +2798,8 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
ovs_assert(cls != NULL);
dpcls_remove(cls, &flow->cr);
cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
- if (flow->mark != INVALID_FLOW_MARK) {
+ if (flow->mark != INVALID_FLOW_MARK || (flow->partial_actions_offloaded
+ && flow->egress_offload_port != ODPP_NONE)) {
queue_netdev_flow_del(pmd, flow);
}
flow->dead = true;
@@ -3486,6 +3557,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
flow->dead = false;
flow->batch = NULL;
flow->mark = INVALID_FLOW_MARK;
+ flow->partial_actions_offloaded = false;
+ flow->egress_offload_port = ODPP_NONE;
*CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
*CONST_CAST(struct flow *, &flow->flow) = match->flow;
*CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
@@ -28,6 +28,7 @@
#include "openvswitch/vlog.h"
#include "packets.h"
#include "uuid.h"
+#include "odp-util.h"
VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
@@ -57,6 +58,7 @@ static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER;
struct ufid_to_rte_flow_data {
struct cmap_node node;
ovs_u128 ufid;
+ uint32_t refcnt;
struct rte_flow *rte_flow;
bool actions_offloaded;
struct dpif_flow_stats stats;
@@ -97,6 +99,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
ovs_assert(data_prev->rte_flow == NULL);
}
+ data->refcnt = 1;
data->ufid = *ufid;
data->rte_flow = rte_flow;
data->actions_offloaded = actions_offloaded;
@@ -1287,7 +1290,8 @@ static int
parse_clone_actions(struct netdev *netdev,
struct flow_actions *actions,
const struct nlattr *clone_actions,
- const size_t clone_actions_len)
+ const size_t clone_actions_len,
+ struct offload_info *info)
{
const struct nlattr *ca;
unsigned int cleft;
@@ -1312,8 +1316,11 @@ parse_clone_actions(struct netdev *netdev,
add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
raw_encap);
} else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
- if (add_output_action(netdev, actions, ca)) {
- return -1;
+ /* add output action only if full-offload */
+ if (!info->partial_actions) {
+ if (add_output_action(netdev, actions, ca)) {
+ return -1;
+ }
}
} else {
VLOG_DBG_RL(&rl,
@@ -1329,12 +1336,15 @@ static int
parse_flow_actions(struct netdev *netdev,
struct flow_actions *actions,
struct nlattr *nl_actions,
- size_t nl_actions_len)
+ size_t nl_actions_len,
+ struct offload_info *info)
{
struct nlattr *nla;
size_t left;
- add_count_action(actions);
+ if (!info->partial_actions) {
+ add_count_action(actions);
+ }
NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
if (add_output_action(netdev, actions, nla)) {
@@ -1366,7 +1376,7 @@ parse_flow_actions(struct netdev *netdev,
size_t clone_actions_len = nl_attr_get_size(nla);
if (parse_clone_actions(netdev, actions, clone_actions,
- clone_actions_len)) {
+ clone_actions_len, info)) {
return -1;
}
} else {
@@ -1388,15 +1398,22 @@ static struct rte_flow *
netdev_offload_dpdk_actions(struct netdev *netdev,
struct flow_patterns *patterns,
struct nlattr *nl_actions,
- size_t actions_len)
+ size_t actions_len,
+ struct offload_info *info)
{
- const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
+ struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
struct flow_actions actions = { .actions = NULL, .cnt = 0 };
struct rte_flow *flow = NULL;
struct rte_flow_error error;
int ret;
- ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
+ if (info->attr_egress) {
+ flow_attr.ingress = 0;
+ flow_attr.egress = 1;
+ flow_attr.transfer = 0;
+ }
+
+ ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len,info);
if (ret) {
goto out;
}
@@ -1428,8 +1445,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
}
flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
- actions_len);
- if (!flow) {
+ actions_len, info);
+ if (flow) {
+ if (info->partial_actions && info->attr_egress) {
+ /* actions_offloaded should be set to false with partial actions,
+ * since it is still considered as partial-offload and not
+ * full-offload. */
+ actions_offloaded = false;
+ }
+ } else if (!(info->partial_actions && info->attr_egress)) {
/* If we failed to offload the rule actions fallback to MARK+RSS
* actions.
*/
@@ -1482,18 +1506,29 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
struct dpif_flow_stats *stats)
{
struct ufid_to_rte_flow_data *rte_flow_data;
- int ret;
+ int ret = 0;
- /*
- * If an old rte_flow exists, it means it's a flow modification.
- * Here destroy the old rte flow first before adding a new one.
- */
rte_flow_data = ufid_to_rte_flow_data_find(ufid);
if (rte_flow_data && rte_flow_data->rte_flow) {
- ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
- rte_flow_data->rte_flow);
- if (ret < 0) {
+ if (unlikely(info->partial_actions && info->attr_egress)) {
+ /* In the case of partial action offload, the same mega-flow
+ * could be offloaded by multiple PMD threads. Avoid creating
+ * multiple rte_flows and just update the refcnt.
+ */
+ VLOG_DBG_RL("%s: mega_ufid: "UUID_FMT" refcnt: %d\n", __func__,
+ UUID_ARGS((struct uuid *)ufid), rte_flow_data->refcnt);
+ rte_flow_data->refcnt++;
return ret;
+ } else {
+ /*
+ * If an old rte_flow exists, it means it's a flow modification.
+ * Here destroy the old rte flow first before adding a new one.
+ */
+ ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
+ rte_flow_data->rte_flow);
+ if (ret < 0) {
+ return ret;
+ }
}
}
@@ -1515,6 +1550,12 @@ netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
return -1;
}
+ VLOG_DBG_RL("%s: mega_ufid: "UUID_FMT" refcnt: %d\n", __func__,
+ UUID_ARGS((struct uuid *)ufid), rte_flow_data->refcnt);
+ if (rte_flow_data->refcnt-- > 1) {
+ return 0;
+ }
+
if (stats) {
memset(stats, 0, sizeof *stats);
}
@@ -1574,10 +1615,135 @@ out:
return ret;
}
+/* Structure to hold a nl_parsed OVS action */
+struct action_attr {
+ int type; /* OVS action type */
+ struct nlattr *action; /* action attribute */
+};
+
+/*
+ * Maxium number of actions to be parsed while selecting a flow for partial
+ * action offload. This number is currently based on the minimum number of
+ * attributes seen with the tunnel encap action (clone, tunnel_push, output).
+ * This number includes output action to a single egress device (uplink) and
+ * supports neither multiple clone() actions nor multiple output actions.
+ * This number could change if and when we support other actions or
+ * combinations of actions for partial offload.
+ */
+#define MAX_ACTION_ATTRS 3 /* Max # action attributes supported */
+
+/*
+ * This function parses the list of OVS "actions" of length "actions_len",
+ * and returns them in an array of action "attrs", of size "max_attrs".
+ * The parsed number of actions is returned in "num_attrs". If the number
+ * of actions exceeds "max_attrs", parsing is stopped and E2BIG is returned.
+ * Otherwise, returns success (0).
+ */
+static int
+parse_nlattr_actions(struct nlattr *actions, size_t actions_len,
+ struct action_attr *attrs, int max_attrs, int *num_attrs)
+{
+ const struct nlattr *a;
+ unsigned int left;
+ int num_actions = 0;
+ int n_attrs = 0;
+ int rc = 0;
+ int type;
+
+ *num_attrs = 0;
+
+ NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
+ type = nl_attr_type(a);
+
+ if (num_actions >= max_attrs) {
+ *num_attrs = num_actions;
+ return E2BIG;
+ }
+
+ attrs[num_actions].type = type;
+ attrs[num_actions].action = a;
+ num_actions++;
+ if (type == OVS_ACTION_ATTR_CLONE) {
+ rc = parse_nlattr_actions(nl_attr_get(a), nl_attr_get_size(a),
+ &attrs[num_actions],
+ (max_attrs - num_actions), &n_attrs);
+ num_actions += n_attrs;
+ if (rc == E2BIG) {
+ *num_attrs = num_actions;
+ return rc;
+ }
+ }
+ }
+
+ *num_attrs = num_actions;
+ return 0;
+}
+
+/* This function determines if the given flow should be partially offloaded
+ * on the egress device, when the in-port is not offload-capable like a
+ * vhost-user port. The function currently supports offloading of only
+ * tunnel encap action.
+ */
+bool
+netdev_offload_dpdk_egress_partial(struct netdev *netdev, struct match *match,
+ struct nlattr *actions, size_t actions_len,
+ struct netdev **egress_netdev,
+ odp_port_t *egress_port)
+{
+ struct action_attr attrs[MAX_ACTION_ATTRS];
+ odp_port_t out_port = ODPP_NONE;
+ struct netdev *out_netdev;
+ int num_attrs = 0;
+ int type;
+ int rc;
+
+ /* Support egress partial-offload only when in-port is vhost-user. */
+ if (!is_dpdk_vhost_netdev(netdev)) {
+ return false;
+ }
+
+ rc = parse_nlattr_actions(actions, actions_len, attrs, MAX_ACTION_ATTRS,
+ &num_attrs);
+ if (rc == E2BIG) {
+ /* Action list too big; decline partial offload */
+ return false;
+ }
+
+ /* Number of attrs expected with tunnel encap action */
+ if (num_attrs < MAX_ACTION_ATTRS) {
+ return false;
+ }
+
+ /* Only support clone sub-actions for now, tnl-push specifically. */
+ if (attrs[0].type != OVS_ACTION_ATTR_CLONE ||
+ attrs[1].type != OVS_ACTION_ATTR_TUNNEL_PUSH ||
+ attrs[2].type != OVS_ACTION_ATTR_OUTPUT) {
+ return false;
+ }
+
+ /* Egress partial-offload needs an output action at the end. */
+ out_port = nl_attr_get_odp_port(attrs[2].action);
+ if (out_port == ODPP_NONE) {
+ return false;
+ }
+
+ /* Support egress partial-offload only when out-port is offload capable. */
+ out_netdev = netdev_ports_get(out_port, netdev->dpif_type);
+ if (!out_netdev || !netdev_dpdk_flow_api_supported(out_netdev)) {
+ return false;
+ }
+
+ /* Flow can be egress partial-offloaded. */
+ *egress_netdev = out_netdev;
+ *egress_port = out_port;
+ return true;
+}
+
const struct netdev_flow_api netdev_offload_dpdk = {
.type = "dpdk_flow_api",
.flow_put = netdev_offload_dpdk_flow_put,
.flow_del = netdev_offload_dpdk_flow_del,
.init_flow_api = netdev_offload_dpdk_init_flow_api,
.flow_get = netdev_offload_dpdk_flow_get,
+ .flow_offload_egress_partial = netdev_offload_dpdk_egress_partial,
};
@@ -86,6 +86,13 @@ struct netdev_flow_api {
/* Initializies the netdev flow api.
* Return 0 if successful, otherwise returns a positive errno value. */
int (*init_flow_api)(struct netdev *);
+
+ /* Determine if the flow should be offloaded to the egress
+ * device and if yes return true; otherwise return false.
+ */
+ bool (*flow_offload_egress_partial)(struct netdev *, struct match *,
+ struct nlattr *, size_t,
+ struct netdev **, odp_port_t *);
};
int netdev_register_flow_api_provider(const struct netdev_flow_api *);
@@ -666,3 +666,70 @@ netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
}
}
}
+
+bool
+netdev_partial_offload_egress(struct netdev *netdev, const char *dpif_type,
+ struct match *match, struct nlattr *actions,
+ size_t act_len, struct netdev **egress_netdev,
+ odp_port_t *egress_port)
+{
+ struct netdev *flow_api_netdev;
+ struct port_to_netdev_data *data;
+ struct netdev_flow_api *flow_api =
+ ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
+
+ /* Ingress netdev is offload capable; don't need egress offload */
+ if (flow_api) {
+ return false;
+ }
+
+ /* Ingress netdev must belong to the datapath specified */
+ if (netdev_get_dpif_type(netdev) != dpif_type) {
+ return false;
+ }
+
+ /* Walk the list of netdevs of the given dpif_type, looking for any
+ * netdev that supports flow_api (flow_api_netdev). And if that flow_api
+ * supports partial_offload_egress api, invoke it but using the ingress
+ * netdev that doesn't support flow_api. Note that we are using the
+ * flow_api_netdev as just an api conduit and it is not the actual
+ * netdev for which the api (should_offload_egress) is being invoked.
+ */
+ ovs_rwlock_rdlock(&netdev_hmap_rwlock);
+ HMAP_FOR_EACH (data, portno_node, &port_to_netdev) {
+ flow_api_netdev = data->netdev;
+ if (netdev_get_dpif_type(flow_api_netdev) != dpif_type) {
+ continue;
+ }
+ flow_api = ovsrcu_get(const struct netdev_flow_api *,
+ &flow_api_netdev->flow_api);
+ if (!flow_api) {
+ continue;
+ }
+ netdev_ref(flow_api_netdev);
+ break;
+ }
+ ovs_rwlock_unlock(&netdev_hmap_rwlock);
+
+ /* Couldn't find any netdev in the given dp that supports flow_api */
+ if (!flow_api) {
+ return false;
+ }
+
+ /* flow_api does not support egress offload */
+ if (!flow_api->flow_offload_egress_partial) {
+ netdev_close(flow_api_netdev);
+ return false;
+ }
+
+ /* Given ingress netdev, can the flow be offloaded to an egress dev ? */
+ if (!flow_api->flow_offload_egress_partial(netdev, match, actions, act_len,
+ egress_netdev, egress_port)) {
+ netdev_close(flow_api_netdev);
+ return false;
+ }
+
+ /* Success: flow can be offloaded to the egress netdev' */
+ netdev_close(flow_api_netdev);
+ return true;
+}
@@ -67,6 +67,8 @@ struct offload_info {
bool recirc_id_shared_with_tc; /* Indicates whever tc chains will be in
* sync with datapath recirc ids. */
+ uint8_t attr_egress; /* Egress direction offload */
+ uint8_t partial_actions; /* Partial action offload; no forward action */
/*
* The flow mark id assigened to the flow. If any pkts hit the flow,
@@ -124,7 +126,11 @@ int netdev_ports_flow_get(const char *dpif_type, struct match *match,
struct dpif_flow_stats *stats,
struct dpif_flow_attrs *attrs,
struct ofpbuf *buf);
-
+bool netdev_partial_offload_egress(struct netdev *netdev,
+ const char *dpif_type, struct match *match,
+ struct nlattr *actions, size_t act_len,
+ struct netdev **egress_netdev,
+ odp_port_t *egress_port);
#ifdef __cplusplus
}
#endif