diff mbox

[ovs-dev] tunneling: Combining actions to avoid packet recirculation in tunnel encapsulation.

Message ID 1483112192-7461-1-git-send-email-sugesh.chandran@intel.com
State Changes Requested
Headers show

Commit Message

Chandran, Sugesh Dec. 30, 2016, 3:36 p.m. UTC
Currently the tunnel packets are recirculated in OVS after the tunnel
encapsulation. This patch combines the post tunnel actions with the tunnel
push to avoid unnecessary recirculation.
Two new datapath actions OVS_ATTR_COMBINE_START and OVS_ATTR_COMBINE_END are
added to implement this feature. These dummy new actions  used as a delimiters
for combined action subset.
In our testing this patch improves the VxLAN encapsulation performance upto 30%.

Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
Co-authored-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
---
 datapath/linux/compat/include/linux/openvswitch.h |  2 +
 lib/dpif-netdev.c                                 | 22 ++----
 lib/dpif.c                                        |  2 +
 lib/odp-execute.c                                 | 59 ++++++++++++++-
 lib/odp-util.c                                    | 68 ++++++++++++++++-
 lib/odp-util.h                                    |  6 ++
 ofproto/ofproto-dpif-sflow.c                      |  2 +
 ofproto/ofproto-dpif-xlate.c                      | 89 +++++++++++++++++++++++
 8 files changed, 228 insertions(+), 22 deletions(-)

Comments

Jarno Rajahalme Jan. 3, 2017, 11:59 p.m. UTC | #1
Sugesh,

So the issue is that instead of recirculating on the datapath, we can compute the “recirculation” actions on the tunnel push translation time, as these actions only depend on the tunnel attributes? This explains why the recirculation in this case is unnecessary, and mentioning this in the commit message would make the motivation/correctness easier to understand.

It seems to me that OVS_ACTION_ATTR_START_COMBINE/OVS_ACTION_ATTR_END_COMBINE semantics is to clone the packet and execute the actions in between. A cleaner way of doing this would be with (e.g.) “OVS_ACTION_ATTR_CLONE” that would be a nested attribute, the length of which would span to the end of the nested attributes (i.e., the actions produced by the tunnel output). This would be likely also faster to execute as the end does not need to be searched. Semantically this would be the same as the sample action with probability of 1, but simpler. Obviously the removal of the final cloning could be avoided like now.

Finally, translating the tunnel output looks a lot like translating patch port output. I wonder if it would be possible to share some of the code via some new helpers rather than duplicating it?

  Jarno

> On Dec 30, 2016, at 7:36 AM, Sugesh Chandran <sugesh.chandran@intel.com> wrote:
> 
> Currently the tunnel packets are recirculated in OVS after the tunnel
> encapsulation. This patch combines the post tunnel actions with the tunnel
> push to avoid unnecessary recirculation.
> Two new datapath actions OVS_ATTR_COMBINE_START and OVS_ATTR_COMBINE_END are
> added to implement this feature. These dummy new actions  used as a delimiters
> for combined action subset.
> In our testing this patch improves the VxLAN encapsulation performance upto 30%.
> 
> Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
> Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Co-authored-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> ---
> datapath/linux/compat/include/linux/openvswitch.h |  2 +
> lib/dpif-netdev.c                                 | 22 ++----
> lib/dpif.c                                        |  2 +
> lib/odp-execute.c                                 | 59 ++++++++++++++-
> lib/odp-util.c                                    | 68 ++++++++++++++++-
> lib/odp-util.h                                    |  6 ++
> ofproto/ofproto-dpif-sflow.c                      |  2 +
> ofproto/ofproto-dpif-xlate.c                      | 89 +++++++++++++++++++++++
> 8 files changed, 228 insertions(+), 22 deletions(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
> index 12260d8..4e9c7f7 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -818,6 +818,8 @@ enum ovs_action_attr {
> #ifndef __KERNEL__
> 	OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct ovs_action_push_tnl*/
> 	OVS_ACTION_ATTR_TUNNEL_POP,    /* u32 port number. */
> +	OVS_ACTION_ATTR_START_COMBINE,
> +	OVS_ACTION_ATTR_END_COMBINE,
> #endif
> 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
> 				       * from userspace. */
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 0b73056..2e59bd1 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -4430,24 +4430,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> 
>     case OVS_ACTION_ATTR_TUNNEL_PUSH:
>         if (*depth < MAX_RECIRC_DEPTH) {
> -            struct dp_packet_batch tnl_pkt;
> -            struct dp_packet_batch *orig_packets_ = packets_;
> -            int err;
> -
> -            if (!may_steal) {
> -                dp_packet_batch_clone(&tnl_pkt, packets_);
> -                packets_ = &tnl_pkt;
> -                dp_packet_batch_reset_cutlen(orig_packets_);
> -            }
> -
>             dp_packet_batch_apply_cutlen(packets_);
> -
> -            err = push_tnl_action(pmd, a, packets_);
> -            if (!err) {
> -                (*depth)++;
> -                dp_netdev_recirculate(pmd, packets_);
> -                (*depth)--;
> -            }
> +            push_tnl_action(pmd, a, packets_);
>             return;
>         }
>         break;
> @@ -4597,6 +4581,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>         break;
>     }
> 
> +    case OVS_ACTION_ATTR_START_COMBINE:
> +        break;
> +    case OVS_ACTION_ATTR_END_COMBINE:
> +        break;
>     case OVS_ACTION_ATTR_PUSH_VLAN:
>     case OVS_ACTION_ATTR_POP_VLAN:
>     case OVS_ACTION_ATTR_PUSH_MPLS:
> diff --git a/lib/dpif.c b/lib/dpif.c
> index 53958c5..6477f33 100644
> --- a/lib/dpif.c
> +++ b/lib/dpif.c
> @@ -1187,6 +1187,8 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
>     case OVS_ACTION_ATTR_SAMPLE:
>     case OVS_ACTION_ATTR_TRUNC:
>     case OVS_ACTION_ATTR_UNSPEC:
> +    case OVS_ACTION_ATTR_START_COMBINE:
> +    case OVS_ACTION_ATTR_END_COMBINE:
>     case __OVS_ACTION_ATTR_MAX:
>         OVS_NOT_REACHED();
>     }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 4fcff16..c475f98 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -532,6 +532,8 @@ requires_datapath_assistance(const struct nlattr *a)
>     case OVS_ACTION_ATTR_USERSPACE:
>     case OVS_ACTION_ATTR_RECIRC:
>     case OVS_ACTION_ATTR_CT:
> +    case OVS_ACTION_ATTR_START_COMBINE:
> +    case OVS_ACTION_ATTR_END_COMBINE:
>         return true;
> 
>     case OVS_ACTION_ATTR_SET:
> @@ -553,6 +555,29 @@ requires_datapath_assistance(const struct nlattr *a)
>     return false;
> }
> 
> +static inline size_t
> +find_combine_action_end(const struct nlattr **actions_, size_t *actions_len,
> +                        const struct nlattr **comb_start,
> +                        bool *comb_last_action)
> +{
> +    const struct nlattr *a;
> +    const struct nlattr *actions = *actions_;
> +    unsigned int left;
> +    bool last_action;
> +    size_t comb_size = nl_attr_get_u32(actions); /* Size of combined actions */
> +    *actions_len -= NLA_ALIGN(actions->nla_len);
> +    actions = nl_attr_next(actions);
> +
> +    *comb_start = actions;
> +    a = (void *) ((uint8_t *) actions + NLA_ALIGN(comb_size));
> +    left = (*actions_len) - comb_size;
> +    last_action = (left <= NLA_ALIGN(a->nla_len));
> +    *actions_ = a;
> +    *comb_last_action = last_action ? true : false;
> +    *actions_len = left;
> +    return comb_size;
> +}
> +
> void
> odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>                     const struct nlattr *actions, size_t actions_len,
> @@ -561,7 +586,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>     struct dp_packet **packets = batch->packets;
>     int cnt = batch->count;
>     const struct nlattr *a;
> -    unsigned int left;
> +    size_t left;
>     int i;
> 
>     NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
> @@ -573,7 +598,35 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>                 /* Allow 'dp_execute_action' to steal the packet data if we do
>                  * not need it any more. */
>                 bool may_steal = steal && last_action;
> -
> +                if ((enum ovs_action_attr) type ==
> +                                           OVS_ACTION_ATTR_START_COMBINE) {
> +                    size_t comb_action_len;
> +                    const struct nlattr *comb_actions = NULL;
> +                    bool comb_last_action = true;
> +                    comb_action_len = find_combine_action_end(&a, &left,
> +                                                            &comb_actions,
> +                                                            &comb_last_action);
> +                    if (OVS_UNLIKELY(!comb_action_len)) {
> +                        /* No combine present, execute current action */
> +                        goto execute;
> +                    }
> +                    if (OVS_LIKELY(comb_last_action)) {
> +                        odp_execute_actions(dp, batch, 1, comb_actions,
> +                                        comb_action_len, dp_execute_action);
> +                        return;
> +                    }
> +                    else  {
> +                        /* Packets to be duplicated for the sub action */
> +                        struct dp_packet_batch comb_pkt_batch;
> +                        dp_packet_batch_clone(&comb_pkt_batch, batch);
> +                        dp_packet_batch_reset_cutlen(batch);
> +                        odp_execute_actions(dp, &comb_pkt_batch, steal,
> +                                        comb_actions, comb_action_len,
> +                                        dp_execute_action);
> +                    }
> +                    continue;
> +                }
> +execute:
>                 dp_execute_action(dp, batch, a, may_steal);
> 
>                 if (last_action) {
> @@ -683,6 +736,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>         case OVS_ACTION_ATTR_RECIRC:
>         case OVS_ACTION_ATTR_CT:
>         case OVS_ACTION_ATTR_UNSPEC:
> +        case OVS_ACTION_ATTR_START_COMBINE:
> +        case OVS_ACTION_ATTR_END_COMBINE:
>         case __OVS_ACTION_ATTR_MAX:
>             OVS_NOT_REACHED();
>         }
> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 427dd65..9cb8629 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -121,6 +121,8 @@ odp_action_len(uint16_t type)
>     case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;
>     case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;
>     case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE;
> +    case OVS_ACTION_ATTR_START_COMBINE: return sizeof(uint32_t);
> +    case OVS_ACTION_ATTR_END_COMBINE: return 0;
> 
>     case OVS_ACTION_ATTR_UNSPEC:
>     case __OVS_ACTION_ATTR_MAX:
> @@ -509,7 +511,7 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
>                       gnh->oam ? "oam," : "",
>                       gnh->critical ? "crit," : "",
>                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
> - 
> +
>         if (gnh->opt_len) {
>             ds_put_cstr(ds, ",options(");
>             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
> @@ -865,6 +867,17 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
>     case OVS_ACTION_ATTR_CT:
>         format_odp_conntrack_action(ds, a);
>         break;
> +    case OVS_ACTION_ATTR_START_COMBINE: {
> +        uint32_t comb_len;
> +        comb_len = nl_attr_get_u32(a);
> +        if (comb_len) {
> +            ds_put_cstr(ds,"{");
> +        }
> +        break;
> +    }
> +    case OVS_ACTION_ATTR_END_COMBINE:
> +        ds_put_cstr(ds,"}");
> +        break;
>     case OVS_ACTION_ATTR_UNSPEC:
>     case __OVS_ACTION_ATTR_MAX:
>     default:
> @@ -873,6 +886,34 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
>     }
> }
> 
> +static bool
> +is_action_combined(const struct nlattr *a, size_t len)
> +{
> +    bool last_action = false;
> +    last_action = (len <= NLA_ALIGN(a->nla_len));
> +    enum ovs_action_attr type = nl_attr_type(a);
> +    if (type ==  OVS_ACTION_ATTR_START_COMBINE) {
> +        uint32_t comb_len;
> +        comb_len = nl_attr_get_u32(a);
> +        if (comb_len) {
> +            return true;
> +        }
> +    }
> +    else if (type == OVS_ACTION_ATTR_END_COMBINE) {
> +        return true;
> +    }
> +    if (!last_action) {
> +        type =  nl_attr_type(nl_attr_next(a));
> +        if (type == OVS_ACTION_ATTR_END_COMBINE) {
> +            return true;
> +        }
> +    }
> +    else {
> +        return true;
> +    }
> +    return false;
> +}
> +
> void
> format_odp_actions(struct ds *ds, const struct nlattr *actions,
>                    size_t actions_len)
> @@ -882,10 +923,10 @@ format_odp_actions(struct ds *ds, const struct nlattr *actions,
>         unsigned int left;
> 
>         NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
> -            if (a != actions) {
> +            format_odp_action(ds, a);
> +            if (!is_action_combined(a, left)) {
>                 ds_put_char(ds, ',');
>             }
> -            format_odp_action(ds, a);
>         }
>         if (left) {
>             int i;
> @@ -5338,6 +5379,27 @@ odp_put_tunnel_action(const struct flow_tnl *tunnel,
> }
> 
> void
> +odp_put_combine_start_action(struct ofpbuf *odp_actions, uint32_t action_len)
> +{
> +    nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_START_COMBINE, action_len);
> +}
> +
> +void
> +odp_update_combine_start_action(struct nlattr *start_combine,
> +                                uint32_t combine_size)
> +{
> +    uint32_t *combine_len;
> +    combine_len = (uint32_t *)(start_combine + 1);
> +    *combine_len = combine_size;
> +}
> +
> +void
> +odp_put_combine_end_action(struct ofpbuf *odp_actions)
> +{
> +    nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_END_COMBINE);
> +}
> +
> +void
> odp_put_tnl_push_action(struct ofpbuf *odp_actions,
>                         struct ovs_action_push_tnl *data)
> {
> diff --git a/lib/odp-util.h b/lib/odp-util.h
> index 42011bc..b77faee 100644
> --- a/lib/odp-util.h
> +++ b/lib/odp-util.h
> @@ -308,4 +308,10 @@ void odp_put_tunnel_action(const struct flow_tnl *tunnel,
> 
> void odp_put_tnl_push_action(struct ofpbuf *odp_actions,
>                              struct ovs_action_push_tnl *data);
> +void odp_put_combine_end_action(struct ofpbuf *odp_actions);
> +void odp_put_combine_start_action(struct ofpbuf *odp_actions,
> +                                  uint32_t action_len);
> +void
> +odp_update_combine_start_action(struct nlattr *start_combine,
> +                                uint32_t combine_size);
> #endif /* odp-util.h */
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index 37992b4..893c1a5 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -1163,6 +1163,8 @@ dpif_sflow_read_actions(const struct flow *flow,
> 	}
> 	case OVS_ACTION_ATTR_SAMPLE:
> 	case OVS_ACTION_ATTR_UNSPEC:
> +    case OVS_ACTION_ATTR_START_COMBINE:
> +    case OVS_ACTION_ATTR_END_COMBINE:
> 	case __OVS_ACTION_ATTR_MAX:
> 	default:
> 	    break;
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 2977be5..f26896f 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -410,6 +410,9 @@ const char *xlate_strerror(enum xlate_error error)
> static void xlate_action_set(struct xlate_ctx *ctx);
> static void xlate_commit_actions(struct xlate_ctx *ctx);
> 
> +static int
> +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev);
> +
> static void
> ctx_trigger_freeze(struct xlate_ctx *ctx)
> {
> @@ -2865,7 +2868,22 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
>     }
>     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
>     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
> +
> +    struct nlattr *start_combine;
> +    uint32_t action_size = 0;
> +    uint32_t push_action_size = 0;
> +    start_combine = ofpbuf_tail(ctx->odp_actions);
> +    odp_put_combine_start_action(ctx->odp_actions, 0);
> +    action_size = ctx->odp_actions->size;
>     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
> +    push_action_size = ctx->odp_actions->size;
> +    combine_tunnel_actions(ctx, out_dev);
> +    if (ctx->odp_actions->size > push_action_size) {
> +        /* Update the combine action only when combined */
> +        odp_update_combine_start_action(start_combine,
> +                                    (ctx->odp_actions->size - action_size));
> +        odp_put_combine_end_action(ctx->odp_actions);
> +    }
>     return 0;
> }
> 
> @@ -2903,6 +2921,77 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co
>             xport_in->xbundle->protected && xport_out->xbundle->protected);
> }
> 
> +static int
> +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev)
> +{
> +    struct flow *flow = &ctx->xin->flow;
> +    struct flow old_flow = ctx->xin->flow;
> +    bool old_conntrack = ctx->conntracked;
> +    bool old_was_mpls = ctx->was_mpls;
> +    ovs_version_t old_version = ctx->xin->tables_version;
> +    struct ofpbuf old_stack = ctx->stack;
> +    union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
> +    struct ofpbuf old_action_set = ctx->action_set;
> +    uint64_t actset_stub[1024 / 8];
> +    const struct xbridge *xbridge_old = ctx->xbridge;
> +
> +    ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
> +    ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
> +
> +    struct xbridge *xbridge = NULL;
> +
> +    xbridge = out_dev->xbridge;
> +
> +    ctx->xbridge = xbridge;   /* peer bridge replaced*/
> +    flow->in_port.ofp_port = out_dev->ofp_port; /* peer port replaced */
> +    flow->metadata = htonll(0);
> +    memset(&flow->tunnel, 0, sizeof flow->tunnel);
> +    memset(flow->regs, 0, sizeof flow->regs);
> +    flow->actset_output = OFPP_UNSET;
> +    ctx->conntracked = false;
> +    clear_conntrack(flow);
> +
> +    /* The bridge is now known so obtain its table version. */
> +    ctx->xin->tables_version
> +        = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
> +
> +    xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
> +    if (!ctx->freezing) {
> +        xlate_action_set(ctx);
> +    }
> +    if (ctx->freezing) {
> +        finish_freezing(ctx);
> +    }
> +
> +    ctx->xin->flow = old_flow;
> +    ctx->xbridge = xbridge_old;
> +    ofpbuf_uninit(&ctx->action_set);
> +    ctx->action_set = old_action_set;
> +    ofpbuf_uninit(&ctx->stack);
> +    ctx->stack = old_stack;
> +
> +    /* Restore calling bridge's lookup version. */
> +    ctx->xin->tables_version = old_version;
> +
> +    /* The peer bridge popping MPLS should have no effect on the original
> +     * bridge. */
> +    ctx->was_mpls = old_was_mpls;
> +
> +    /* The peer bridge's conntrack execution should have no effect on the
> +     * original bridge. */
> +    ctx->conntracked = old_conntrack;
> +
> +    /* The fact that the peer bridge exits (for any reason) does not mean
> +     * that the original bridge should exit.  Specifically, if the peer
> +     * bridge freezes translation, the original bridge must continue
> +     * processing with the original, not the frozen packet! */
> +    ctx->exit = false;
> +
> +    /* Peer bridge errors do not propagate back. */
> +    ctx->error = XLATE_OK;
> +
> +    return 0;
> +}
> 
> static void
> compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
> -- 
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Chandran, Sugesh Jan. 5, 2017, 2:57 p.m. UTC | #2
Hi Jarno,

Thank you for looking into the path, 
Please find my answers inline blow.


Regards
_Sugesh


> -----Original Message-----

> From: Jarno Rajahalme [mailto:jarno@ovn.org]

> Sent: Wednesday, January 4, 2017 12:00 AM

> To: Chandran, Sugesh <sugesh.chandran@intel.com>

> Cc: zoltan.balogh@ericsson.com; dev@openvswitch.org

> Subject: Re: [ovs-dev] [PATCH] tunneling: Combining actions to avoid packet

> recirculation in tunnel encapsulation.

> 

> Sugesh,

> 

> So the issue is that instead of recirculating on the datapath, we can compute

> the “recirculation” actions on the tunnel push translation time, as these

> actions only depend on the tunnel attributes? This explains why the

> recirculation in this case is unnecessary, and mentioning this in the commit

> message would make the motivation/correctness easier to understand.

[Sugesh] Make sense, Will update the commit message accordingly in the v2.
> 

> It seems to me that

> OVS_ACTION_ATTR_START_COMBINE/OVS_ACTION_ATTR_END_COMBINE

> semantics is to clone the packet and execute the actions in between. A

> cleaner way of doing this would be with (e.g.) “OVS_ACTION_ATTR_CLONE”

> that would be a nested attribute, the length of which would span to the end

> of the nested attributes (i.e., the actions produced by the tunnel output).

> This would be likely also faster to execute as the end does not need to be

> searched. Semantically this would be the same as the sample action with

> probability of 1, but simpler. Obviously the removal of the final cloning could

> be avoided like now.

[Sugesh] Sure it make sense to take out the END_COMBINE action.
I am not very sure to call it as CLONE_ACTION as its not guarantee the packet cloning
in every time(For eg: the final action case). The new dummy actions are used to inform dp about set of actions that
are grouped/combined. These actions are two distinct set of actions which are merged to avoid recirculation in dp.
Can we call it OVS_ACTION_ATTR_COMBINE/OVS_ACTION_ATTR_GROUP/OVS_ACTION_ATTR_SET
than calling them CLONE.? 
> 

> Finally, translating the tunnel output looks a lot like translating patch port

> output. I wonder if it would be possible to share some of the code via some

> new helpers rather than duplicating it?

[Sugesh] Though both share same concept, the fields that used for combine are slightly different.
Do you think a single helper function for combine on translation will work for both path and tunnel.
This may need 'ifs' here and there in the function to do some of the field population and validation exclusively either for
tunneling or  patch port. Do you think its cleaner than having independent functions?
Anyway we will look into this possibility of combining these functions.
>   Jarno

> 

> > On Dec 30, 2016, at 7:36 AM, Sugesh Chandran

> <sugesh.chandran@intel.com> wrote:

> >

> > Currently the tunnel packets are recirculated in OVS after the tunnel

> > encapsulation. This patch combines the post tunnel actions with the

> > tunnel push to avoid unnecessary recirculation.

> > Two new datapath actions OVS_ATTR_COMBINE_START and

> > OVS_ATTR_COMBINE_END are added to implement this feature. These

> dummy

> > new actions  used as a delimiters for combined action subset.

> > In our testing this patch improves the VxLAN encapsulation performance

> upto 30%.

> >

> > Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>

> > Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>

> > Co-authored-by: Zoltán Balogh <zoltan.balogh@ericsson.com>

> > ---

> > datapath/linux/compat/include/linux/openvswitch.h |  2 +

> > lib/dpif-netdev.c                                 | 22 ++----

> > lib/dpif.c                                        |  2 +

> > lib/odp-execute.c                                 | 59 ++++++++++++++-

> > lib/odp-util.c                                    | 68 ++++++++++++++++-

> > lib/odp-util.h                                    |  6 ++

> > ofproto/ofproto-dpif-sflow.c                      |  2 +

> > ofproto/ofproto-dpif-xlate.c                      | 89 +++++++++++++++++++++++

> > 8 files changed, 228 insertions(+), 22 deletions(-)

> >

> > diff --git a/datapath/linux/compat/include/linux/openvswitch.h

> > b/datapath/linux/compat/include/linux/openvswitch.h

> > index 12260d8..4e9c7f7 100644

> > --- a/datapath/linux/compat/include/linux/openvswitch.h

> > +++ b/datapath/linux/compat/include/linux/openvswitch.h

> > @@ -818,6 +818,8 @@ enum ovs_action_attr { #ifndef __KERNEL__

> > 	OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct

> ovs_action_push_tnl*/

> > 	OVS_ACTION_ATTR_TUNNEL_POP,    /* u32 port number. */

> > +	OVS_ACTION_ATTR_START_COMBINE,

> > +	OVS_ACTION_ATTR_END_COMBINE,

> > #endif

> > 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be

> accepted

> > 				       * from userspace. */

> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index

> > 0b73056..2e59bd1 100644

> > --- a/lib/dpif-netdev.c

> > +++ b/lib/dpif-netdev.c

> > @@ -4430,24 +4430,8 @@ dp_execute_cb(void *aux_, struct

> > dp_packet_batch *packets_,

> >

> >     case OVS_ACTION_ATTR_TUNNEL_PUSH:

> >         if (*depth < MAX_RECIRC_DEPTH) {

> > -            struct dp_packet_batch tnl_pkt;

> > -            struct dp_packet_batch *orig_packets_ = packets_;

> > -            int err;

> > -

> > -            if (!may_steal) {

> > -                dp_packet_batch_clone(&tnl_pkt, packets_);

> > -                packets_ = &tnl_pkt;

> > -                dp_packet_batch_reset_cutlen(orig_packets_);

> > -            }

> > -

> >             dp_packet_batch_apply_cutlen(packets_);

> > -

> > -            err = push_tnl_action(pmd, a, packets_);

> > -            if (!err) {

> > -                (*depth)++;

> > -                dp_netdev_recirculate(pmd, packets_);

> > -                (*depth)--;

> > -            }

> > +            push_tnl_action(pmd, a, packets_);

> >             return;

> >         }

> >         break;

> > @@ -4597,6 +4581,10 @@ dp_execute_cb(void *aux_, struct

> dp_packet_batch *packets_,

> >         break;

> >     }

> >

> > +    case OVS_ACTION_ATTR_START_COMBINE:

> > +        break;

> > +    case OVS_ACTION_ATTR_END_COMBINE:

> > +        break;

> >     case OVS_ACTION_ATTR_PUSH_VLAN:

> >     case OVS_ACTION_ATTR_POP_VLAN:

> >     case OVS_ACTION_ATTR_PUSH_MPLS:

> > diff --git a/lib/dpif.c b/lib/dpif.c

> > index 53958c5..6477f33 100644

> > --- a/lib/dpif.c

> > +++ b/lib/dpif.c

> > @@ -1187,6 +1187,8 @@ dpif_execute_helper_cb(void *aux_, struct

> dp_packet_batch *packets_,

> >     case OVS_ACTION_ATTR_SAMPLE:

> >     case OVS_ACTION_ATTR_TRUNC:

> >     case OVS_ACTION_ATTR_UNSPEC:

> > +    case OVS_ACTION_ATTR_START_COMBINE:

> > +    case OVS_ACTION_ATTR_END_COMBINE:

> >     case __OVS_ACTION_ATTR_MAX:

> >         OVS_NOT_REACHED();

> >     }

> > diff --git a/lib/odp-execute.c b/lib/odp-execute.c index

> > 4fcff16..c475f98 100644

> > --- a/lib/odp-execute.c

> > +++ b/lib/odp-execute.c

> > @@ -532,6 +532,8 @@ requires_datapath_assistance(const struct nlattr

> *a)

> >     case OVS_ACTION_ATTR_USERSPACE:

> >     case OVS_ACTION_ATTR_RECIRC:

> >     case OVS_ACTION_ATTR_CT:

> > +    case OVS_ACTION_ATTR_START_COMBINE:

> > +    case OVS_ACTION_ATTR_END_COMBINE:

> >         return true;

> >

> >     case OVS_ACTION_ATTR_SET:

> > @@ -553,6 +555,29 @@ requires_datapath_assistance(const struct nlattr

> *a)

> >     return false;

> > }

> >

> > +static inline size_t

> > +find_combine_action_end(const struct nlattr **actions_, size_t

> *actions_len,

> > +                        const struct nlattr **comb_start,

> > +                        bool *comb_last_action) {

> > +    const struct nlattr *a;

> > +    const struct nlattr *actions = *actions_;

> > +    unsigned int left;

> > +    bool last_action;

> > +    size_t comb_size = nl_attr_get_u32(actions); /* Size of combined

> actions */

> > +    *actions_len -= NLA_ALIGN(actions->nla_len);

> > +    actions = nl_attr_next(actions);

> > +

> > +    *comb_start = actions;

> > +    a = (void *) ((uint8_t *) actions + NLA_ALIGN(comb_size));

> > +    left = (*actions_len) - comb_size;

> > +    last_action = (left <= NLA_ALIGN(a->nla_len));

> > +    *actions_ = a;

> > +    *comb_last_action = last_action ? true : false;

> > +    *actions_len = left;

> > +    return comb_size;

> > +}

> > +

> > void

> > odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,

> >                     const struct nlattr *actions, size_t actions_len,

> > @@ -561,7 +586,7 @@ odp_execute_actions(void *dp, struct

> dp_packet_batch *batch, bool steal,

> >     struct dp_packet **packets = batch->packets;

> >     int cnt = batch->count;

> >     const struct nlattr *a;

> > -    unsigned int left;

> > +    size_t left;

> >     int i;

> >

> >     NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { @@

> > -573,7 +598,35 @@ odp_execute_actions(void *dp, struct

> dp_packet_batch *batch, bool steal,

> >                 /* Allow 'dp_execute_action' to steal the packet data if we do

> >                  * not need it any more. */

> >                 bool may_steal = steal && last_action;

> > -

> > +                if ((enum ovs_action_attr) type ==

> > +                                           OVS_ACTION_ATTR_START_COMBINE) {

> > +                    size_t comb_action_len;

> > +                    const struct nlattr *comb_actions = NULL;

> > +                    bool comb_last_action = true;

> > +                    comb_action_len = find_combine_action_end(&a, &left,

> > +                                                            &comb_actions,

> > +                                                            &comb_last_action);

> > +                    if (OVS_UNLIKELY(!comb_action_len)) {

> > +                        /* No combine present, execute current action */

> > +                        goto execute;

> > +                    }

> > +                    if (OVS_LIKELY(comb_last_action)) {

> > +                        odp_execute_actions(dp, batch, 1, comb_actions,

> > +                                        comb_action_len, dp_execute_action);

> > +                        return;

> > +                    }

> > +                    else  {

> > +                        /* Packets to be duplicated for the sub action */

> > +                        struct dp_packet_batch comb_pkt_batch;

> > +                        dp_packet_batch_clone(&comb_pkt_batch, batch);

> > +                        dp_packet_batch_reset_cutlen(batch);

> > +                        odp_execute_actions(dp, &comb_pkt_batch, steal,

> > +                                        comb_actions, comb_action_len,

> > +                                        dp_execute_action);

> > +                    }

> > +                    continue;

> > +                }

> > +execute:

> >                 dp_execute_action(dp, batch, a, may_steal);

> >

> >                 if (last_action) {

> > @@ -683,6 +736,8 @@ odp_execute_actions(void *dp, struct

> dp_packet_batch *batch, bool steal,

> >         case OVS_ACTION_ATTR_RECIRC:

> >         case OVS_ACTION_ATTR_CT:

> >         case OVS_ACTION_ATTR_UNSPEC:

> > +        case OVS_ACTION_ATTR_START_COMBINE:

> > +        case OVS_ACTION_ATTR_END_COMBINE:

> >         case __OVS_ACTION_ATTR_MAX:

> >             OVS_NOT_REACHED();

> >         }

> > diff --git a/lib/odp-util.c b/lib/odp-util.c index 427dd65..9cb8629

> > 100644

> > --- a/lib/odp-util.c

> > +++ b/lib/odp-util.c

> > @@ -121,6 +121,8 @@ odp_action_len(uint16_t type)

> >     case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;

> >     case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;

> >     case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE;

> > +    case OVS_ACTION_ATTR_START_COMBINE: return sizeof(uint32_t);

> > +    case OVS_ACTION_ATTR_END_COMBINE: return 0;

> >

> >     case OVS_ACTION_ATTR_UNSPEC:

> >     case __OVS_ACTION_ATTR_MAX:

> > @@ -509,7 +511,7 @@ format_odp_tnl_push_header(struct ds *ds, struct

> ovs_action_push_tnl *data)

> >                       gnh->oam ? "oam," : "",

> >                       gnh->critical ? "crit," : "",

> >                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);

> > -

> > +

> >         if (gnh->opt_len) {

> >             ds_put_cstr(ds, ",options(");

> >             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,

> > @@ -865,6 +867,17 @@ format_odp_action(struct ds *ds, const struct

> nlattr *a)

> >     case OVS_ACTION_ATTR_CT:

> >         format_odp_conntrack_action(ds, a);

> >         break;

> > +    case OVS_ACTION_ATTR_START_COMBINE: {

> > +        uint32_t comb_len;

> > +        comb_len = nl_attr_get_u32(a);

> > +        if (comb_len) {

> > +            ds_put_cstr(ds,"{");

> > +        }

> > +        break;

> > +    }

> > +    case OVS_ACTION_ATTR_END_COMBINE:

> > +        ds_put_cstr(ds,"}");

> > +        break;

> >     case OVS_ACTION_ATTR_UNSPEC:

> >     case __OVS_ACTION_ATTR_MAX:

> >     default:

> > @@ -873,6 +886,34 @@ format_odp_action(struct ds *ds, const struct

> nlattr *a)

> >     }

> > }

> >

> > +static bool

> > +is_action_combined(const struct nlattr *a, size_t len) {

> > +    bool last_action = false;

> > +    last_action = (len <= NLA_ALIGN(a->nla_len));

> > +    enum ovs_action_attr type = nl_attr_type(a);

> > +    if (type ==  OVS_ACTION_ATTR_START_COMBINE) {

> > +        uint32_t comb_len;

> > +        comb_len = nl_attr_get_u32(a);

> > +        if (comb_len) {

> > +            return true;

> > +        }

> > +    }

> > +    else if (type == OVS_ACTION_ATTR_END_COMBINE) {

> > +        return true;

> > +    }

> > +    if (!last_action) {

> > +        type =  nl_attr_type(nl_attr_next(a));

> > +        if (type == OVS_ACTION_ATTR_END_COMBINE) {

> > +            return true;

> > +        }

> > +    }

> > +    else {

> > +        return true;

> > +    }

> > +    return false;

> > +}

> > +

> > void

> > format_odp_actions(struct ds *ds, const struct nlattr *actions,

> >                    size_t actions_len) @@ -882,10 +923,10 @@

> > format_odp_actions(struct ds *ds, const struct nlattr *actions,

> >         unsigned int left;

> >

> >         NL_ATTR_FOR_EACH (a, left, actions, actions_len) {

> > -            if (a != actions) {

> > +            format_odp_action(ds, a);

> > +            if (!is_action_combined(a, left)) {

> >                 ds_put_char(ds, ',');

> >             }

> > -            format_odp_action(ds, a);

> >         }

> >         if (left) {

> >             int i;

> > @@ -5338,6 +5379,27 @@ odp_put_tunnel_action(const struct flow_tnl

> > *tunnel, }

> >

> > void

> > +odp_put_combine_start_action(struct ofpbuf *odp_actions, uint32_t

> > +action_len) {

> > +    nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_START_COMBINE,

> > +action_len); }

> > +

> > +void

> > +odp_update_combine_start_action(struct nlattr *start_combine,

> > +                                uint32_t combine_size) {

> > +    uint32_t *combine_len;

> > +    combine_len = (uint32_t *)(start_combine + 1);

> > +    *combine_len = combine_size;

> > +}

> > +

> > +void

> > +odp_put_combine_end_action(struct ofpbuf *odp_actions) {

> > +    nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_END_COMBINE); }

> > +

> > +void

> > odp_put_tnl_push_action(struct ofpbuf *odp_actions,

> >                         struct ovs_action_push_tnl *data) { diff --git

> > a/lib/odp-util.h b/lib/odp-util.h index 42011bc..b77faee 100644

> > --- a/lib/odp-util.h

> > +++ b/lib/odp-util.h

> > @@ -308,4 +308,10 @@ void odp_put_tunnel_action(const struct flow_tnl

> > *tunnel,

> >

> > void odp_put_tnl_push_action(struct ofpbuf *odp_actions,

> >                              struct ovs_action_push_tnl *data);

> > +void odp_put_combine_end_action(struct ofpbuf *odp_actions); void

> > +odp_put_combine_start_action(struct ofpbuf *odp_actions,

> > +                                  uint32_t action_len); void

> > +odp_update_combine_start_action(struct nlattr *start_combine,

> > +                                uint32_t combine_size);

> > #endif /* odp-util.h */

> > diff --git a/ofproto/ofproto-dpif-sflow.c

> > b/ofproto/ofproto-dpif-sflow.c index 37992b4..893c1a5 100644

> > --- a/ofproto/ofproto-dpif-sflow.c

> > +++ b/ofproto/ofproto-dpif-sflow.c

> > @@ -1163,6 +1163,8 @@ dpif_sflow_read_actions(const struct flow *flow,

> > 	}

> > 	case OVS_ACTION_ATTR_SAMPLE:

> > 	case OVS_ACTION_ATTR_UNSPEC:

> > +    case OVS_ACTION_ATTR_START_COMBINE:

> > +    case OVS_ACTION_ATTR_END_COMBINE:

> > 	case __OVS_ACTION_ATTR_MAX:

> > 	default:

> > 	    break;

> > diff --git a/ofproto/ofproto-dpif-xlate.c

> > b/ofproto/ofproto-dpif-xlate.c index 2977be5..f26896f 100644

> > --- a/ofproto/ofproto-dpif-xlate.c

> > +++ b/ofproto/ofproto-dpif-xlate.c

> > @@ -410,6 +410,9 @@ const char *xlate_strerror(enum xlate_error error)

> > static void xlate_action_set(struct xlate_ctx *ctx); static void

> > xlate_commit_actions(struct xlate_ctx *ctx);

> >

> > +static int

> > +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev);

> > +

> > static void

> > ctx_trigger_freeze(struct xlate_ctx *ctx) { @@ -2865,7 +2868,22 @@

> > build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,

> >     }

> >     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);

> >     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);

> > +

> > +    struct nlattr *start_combine;

> > +    uint32_t action_size = 0;

> > +    uint32_t push_action_size = 0;

> > +    start_combine = ofpbuf_tail(ctx->odp_actions);

> > +    odp_put_combine_start_action(ctx->odp_actions, 0);

> > +    action_size = ctx->odp_actions->size;

> >     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);

> > +    push_action_size = ctx->odp_actions->size;

> > +    combine_tunnel_actions(ctx, out_dev);

> > +    if (ctx->odp_actions->size > push_action_size) {

> > +        /* Update the combine action only when combined */

> > +        odp_update_combine_start_action(start_combine,

> > +                                    (ctx->odp_actions->size - action_size));

> > +        odp_put_combine_end_action(ctx->odp_actions);

> > +    }

> >     return 0;

> > }

> >

> > @@ -2903,6 +2921,77 @@ xlate_flow_is_protected(const struct xlate_ctx

> *ctx, const struct flow *flow, co

> >             xport_in->xbundle->protected &&

> > xport_out->xbundle->protected); }

> >

> > +static int

> > +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev)

> > +{

> > +    struct flow *flow = &ctx->xin->flow;

> > +    struct flow old_flow = ctx->xin->flow;

> > +    bool old_conntrack = ctx->conntracked;

> > +    bool old_was_mpls = ctx->was_mpls;

> > +    ovs_version_t old_version = ctx->xin->tables_version;

> > +    struct ofpbuf old_stack = ctx->stack;

> > +    union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];

> > +    struct ofpbuf old_action_set = ctx->action_set;

> > +    uint64_t actset_stub[1024 / 8];

> > +    const struct xbridge *xbridge_old = ctx->xbridge;

> > +

> > +    ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);

> > +    ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof

> > + actset_stub);

> > +

> > +    struct xbridge *xbridge = NULL;

> > +

> > +    xbridge = out_dev->xbridge;

> > +

> > +    ctx->xbridge = xbridge;   /* peer bridge replaced*/

> > +    flow->in_port.ofp_port = out_dev->ofp_port; /* peer port replaced */

> > +    flow->metadata = htonll(0);

> > +    memset(&flow->tunnel, 0, sizeof flow->tunnel);

> > +    memset(flow->regs, 0, sizeof flow->regs);

> > +    flow->actset_output = OFPP_UNSET;

> > +    ctx->conntracked = false;

> > +    clear_conntrack(flow);

> > +

> > +    /* The bridge is now known so obtain its table version. */

> > +    ctx->xin->tables_version

> > +        = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);

> > +

> > +    xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);

> > +    if (!ctx->freezing) {

> > +        xlate_action_set(ctx);

> > +    }

> > +    if (ctx->freezing) {

> > +        finish_freezing(ctx);

> > +    }

> > +

> > +    ctx->xin->flow = old_flow;

> > +    ctx->xbridge = xbridge_old;

> > +    ofpbuf_uninit(&ctx->action_set);

> > +    ctx->action_set = old_action_set;

> > +    ofpbuf_uninit(&ctx->stack);

> > +    ctx->stack = old_stack;

> > +

> > +    /* Restore calling bridge's lookup version. */

> > +    ctx->xin->tables_version = old_version;

> > +

> > +    /* The peer bridge popping MPLS should have no effect on the original

> > +     * bridge. */

> > +    ctx->was_mpls = old_was_mpls;

> > +

> > +    /* The peer bridge's conntrack execution should have no effect on the

> > +     * original bridge. */

> > +    ctx->conntracked = old_conntrack;

> > +

> > +    /* The fact that the peer bridge exits (for any reason) does not mean

> > +     * that the original bridge should exit.  Specifically, if the peer

> > +     * bridge freezes translation, the original bridge must continue

> > +     * processing with the original, not the frozen packet! */

> > +    ctx->exit = false;

> > +

> > +    /* Peer bridge errors do not propagate back. */

> > +    ctx->error = XLATE_OK;

> > +

> > +    return 0;

> > +}

> >

> > static void

> > compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,

> > --

> > 2.7.4

> >

> > _______________________________________________

> > dev mailing list

> > dev@openvswitch.org

> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Jarno Rajahalme Jan. 5, 2017, 9:41 p.m. UTC | #3
> On Jan 5, 2017, at 6:57 AM, Chandran, Sugesh <sugesh.chandran@intel.com> wrote:
> 
> Hi Jarno,
> 
> Thank you for looking into the path, 
> Please find my answers inline blow.
> 
> 
> Regards
> _Sugesh
> 
> 
>> -----Original Message-----
>> From: Jarno Rajahalme [mailto:jarno@ovn.org]
>> Sent: Wednesday, January 4, 2017 12:00 AM
>> To: Chandran, Sugesh <sugesh.chandran@intel.com>
>> Cc: zoltan.balogh@ericsson.com; dev@openvswitch.org
>> Subject: Re: [ovs-dev] [PATCH] tunneling: Combining actions to avoid packet
>> recirculation in tunnel encapsulation.
>> 
>> Sugesh,
>> 
>> So the issue is that instead of recirculating on the datapath, we can compute
>> the “recirculation” actions on the tunnel push translation time, as these
>> actions only depend on the tunnel attributes? This explains why the
>> recirculation in this case is unnecessary, and mentioning this in the commit
>> message would make the motivation/correctness easier to understand.
> [Sugesh] Make sense, Will update the commit message accordingly in the v2.
>> 
>> It seems to me that
>> OVS_ACTION_ATTR_START_COMBINE/OVS_ACTION_ATTR_END_COMBINE
>> semantics is to clone the packet and execute the actions in between. A
>> cleaner way of doing this would be with (e.g.) “OVS_ACTION_ATTR_CLONE”
>> that would be a nested attribute, the length of which would span to the end
>> of the nested attributes (i.e., the actions produced by the tunnel output).
>> This would be likely also faster to execute as the end does not need to be
>> searched. Semantically this would be the same as the sample action with
>> probability of 1, but simpler. Obviously the removal of the final cloning could
>> be avoided like now.
> [Sugesh] Sure it make sense to take out the END_COMBINE action.
> I am not very sure to call it as CLONE_ACTION as its not guarantee the packet cloning
> in every time(For eg: the final action case). The new dummy actions are used to inform dp about set of actions that
> are grouped/combined. These actions are two distinct set of actions which are merged to avoid recirculation in dp.
> Can we call it OVS_ACTION_ATTR_COMBINE/OVS_ACTION_ATTR_GROUP/OVS_ACTION_ATTR_SET
> than calling them CLONE.? 

I think CLONE is more understandable than COMBINE, as the semantics is that any nested actions are applied to a clone of the packet, so that none of those actions need to be undone prior to the remaining actions, which see the packet as it was before any of the nested actions. To me none of the alternatives you provided even hint to this semantics, and the fact that the final cloning can be avoided is a lower layer optimization that does not make any semantic difference.

>> 
>> Finally, translating the tunnel output looks a lot like translating patch port
>> output. I wonder if it would be possible to share some of the code via some
>> new helpers rather than duplicating it?
> [Sugesh] Though both share same concept, the fields that used for combine are slightly different.
> Do you think a single helper function for combine on translation will work for both path and tunnel.
> This may need 'ifs' here and there in the function to do some of the field population and validation exclusively either for
> tunneling or  patch port. Do you think its cleaner than having independent functions?
> Anyway we will look into this possibility of combining these functions.

I’d appreciate that. But let me know if it becomes too messy and in the end is not worth it.

  Jarno

>>  Jarno
>> 
>>> On Dec 30, 2016, at 7:36 AM, Sugesh Chandran
>> <sugesh.chandran@intel.com> wrote:
>>> 
>>> Currently the tunnel packets are recirculated in OVS after the tunnel
>>> encapsulation. This patch combines the post tunnel actions with the
>>> tunnel push to avoid unnecessary recirculation.
>>> Two new datapath actions OVS_ATTR_COMBINE_START and
>>> OVS_ATTR_COMBINE_END are added to implement this feature. These
>> dummy
>>> new actions  used as a delimiters for combined action subset.
>>> In our testing this patch improves the VxLAN encapsulation performance
>> upto 30%.
>>> 
>>> Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
>>> Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
>>> Co-authored-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
>>> ---
>>> datapath/linux/compat/include/linux/openvswitch.h |  2 +
>>> lib/dpif-netdev.c                                 | 22 ++----
>>> lib/dpif.c                                        |  2 +
>>> lib/odp-execute.c                                 | 59 ++++++++++++++-
>>> lib/odp-util.c                                    | 68 ++++++++++++++++-
>>> lib/odp-util.h                                    |  6 ++
>>> ofproto/ofproto-dpif-sflow.c                      |  2 +
>>> ofproto/ofproto-dpif-xlate.c                      | 89 +++++++++++++++++++++++
>>> 8 files changed, 228 insertions(+), 22 deletions(-)
>>> 
>>> diff --git a/datapath/linux/compat/include/linux/openvswitch.h
>>> b/datapath/linux/compat/include/linux/openvswitch.h
>>> index 12260d8..4e9c7f7 100644
>>> --- a/datapath/linux/compat/include/linux/openvswitch.h
>>> +++ b/datapath/linux/compat/include/linux/openvswitch.h
>>> @@ -818,6 +818,8 @@ enum ovs_action_attr { #ifndef __KERNEL__
>>> 	OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct
>> ovs_action_push_tnl*/
>>> 	OVS_ACTION_ATTR_TUNNEL_POP,    /* u32 port number. */
>>> +	OVS_ACTION_ATTR_START_COMBINE,
>>> +	OVS_ACTION_ATTR_END_COMBINE,
>>> #endif
>>> 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be
>> accepted
>>> 				       * from userspace. */
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
>>> 0b73056..2e59bd1 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -4430,24 +4430,8 @@ dp_execute_cb(void *aux_, struct
>>> dp_packet_batch *packets_,
>>> 
>>>    case OVS_ACTION_ATTR_TUNNEL_PUSH:
>>>        if (*depth < MAX_RECIRC_DEPTH) {
>>> -            struct dp_packet_batch tnl_pkt;
>>> -            struct dp_packet_batch *orig_packets_ = packets_;
>>> -            int err;
>>> -
>>> -            if (!may_steal) {
>>> -                dp_packet_batch_clone(&tnl_pkt, packets_);
>>> -                packets_ = &tnl_pkt;
>>> -                dp_packet_batch_reset_cutlen(orig_packets_);
>>> -            }
>>> -
>>>            dp_packet_batch_apply_cutlen(packets_);
>>> -
>>> -            err = push_tnl_action(pmd, a, packets_);
>>> -            if (!err) {
>>> -                (*depth)++;
>>> -                dp_netdev_recirculate(pmd, packets_);
>>> -                (*depth)--;
>>> -            }
>>> +            push_tnl_action(pmd, a, packets_);
>>>            return;
>>>        }
>>>        break;
>>> @@ -4597,6 +4581,10 @@ dp_execute_cb(void *aux_, struct
>> dp_packet_batch *packets_,
>>>        break;
>>>    }
>>> 
>>> +    case OVS_ACTION_ATTR_START_COMBINE:
>>> +        break;
>>> +    case OVS_ACTION_ATTR_END_COMBINE:
>>> +        break;
>>>    case OVS_ACTION_ATTR_PUSH_VLAN:
>>>    case OVS_ACTION_ATTR_POP_VLAN:
>>>    case OVS_ACTION_ATTR_PUSH_MPLS:
>>> diff --git a/lib/dpif.c b/lib/dpif.c
>>> index 53958c5..6477f33 100644
>>> --- a/lib/dpif.c
>>> +++ b/lib/dpif.c
>>> @@ -1187,6 +1187,8 @@ dpif_execute_helper_cb(void *aux_, struct
>> dp_packet_batch *packets_,
>>>    case OVS_ACTION_ATTR_SAMPLE:
>>>    case OVS_ACTION_ATTR_TRUNC:
>>>    case OVS_ACTION_ATTR_UNSPEC:
>>> +    case OVS_ACTION_ATTR_START_COMBINE:
>>> +    case OVS_ACTION_ATTR_END_COMBINE:
>>>    case __OVS_ACTION_ATTR_MAX:
>>>        OVS_NOT_REACHED();
>>>    }
>>> diff --git a/lib/odp-execute.c b/lib/odp-execute.c index
>>> 4fcff16..c475f98 100644
>>> --- a/lib/odp-execute.c
>>> +++ b/lib/odp-execute.c
>>> @@ -532,6 +532,8 @@ requires_datapath_assistance(const struct nlattr
>> *a)
>>>    case OVS_ACTION_ATTR_USERSPACE:
>>>    case OVS_ACTION_ATTR_RECIRC:
>>>    case OVS_ACTION_ATTR_CT:
>>> +    case OVS_ACTION_ATTR_START_COMBINE:
>>> +    case OVS_ACTION_ATTR_END_COMBINE:
>>>        return true;
>>> 
>>>    case OVS_ACTION_ATTR_SET:
>>> @@ -553,6 +555,29 @@ requires_datapath_assistance(const struct nlattr
>> *a)
>>>    return false;
>>> }
>>> 
>>> +static inline size_t
>>> +find_combine_action_end(const struct nlattr **actions_, size_t
>> *actions_len,
>>> +                        const struct nlattr **comb_start,
>>> +                        bool *comb_last_action) {
>>> +    const struct nlattr *a;
>>> +    const struct nlattr *actions = *actions_;
>>> +    unsigned int left;
>>> +    bool last_action;
>>> +    size_t comb_size = nl_attr_get_u32(actions); /* Size of combined
>> actions */
>>> +    *actions_len -= NLA_ALIGN(actions->nla_len);
>>> +    actions = nl_attr_next(actions);
>>> +
>>> +    *comb_start = actions;
>>> +    a = (void *) ((uint8_t *) actions + NLA_ALIGN(comb_size));
>>> +    left = (*actions_len) - comb_size;
>>> +    last_action = (left <= NLA_ALIGN(a->nla_len));
>>> +    *actions_ = a;
>>> +    *comb_last_action = last_action ? true : false;
>>> +    *actions_len = left;
>>> +    return comb_size;
>>> +}
>>> +
>>> void
>>> odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
>>>                    const struct nlattr *actions, size_t actions_len,
>>> @@ -561,7 +586,7 @@ odp_execute_actions(void *dp, struct
>> dp_packet_batch *batch, bool steal,
>>>    struct dp_packet **packets = batch->packets;
>>>    int cnt = batch->count;
>>>    const struct nlattr *a;
>>> -    unsigned int left;
>>> +    size_t left;
>>>    int i;
>>> 
>>>    NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { @@
>>> -573,7 +598,35 @@ odp_execute_actions(void *dp, struct
>> dp_packet_batch *batch, bool steal,
>>>                /* Allow 'dp_execute_action' to steal the packet data if we do
>>>                 * not need it any more. */
>>>                bool may_steal = steal && last_action;
>>> -
>>> +                if ((enum ovs_action_attr) type ==
>>> +                                           OVS_ACTION_ATTR_START_COMBINE) {
>>> +                    size_t comb_action_len;
>>> +                    const struct nlattr *comb_actions = NULL;
>>> +                    bool comb_last_action = true;
>>> +                    comb_action_len = find_combine_action_end(&a, &left,
>>> +                                                            &comb_actions,
>>> +                                                            &comb_last_action);
>>> +                    if (OVS_UNLIKELY(!comb_action_len)) {
>>> +                        /* No combine present, execute current action */
>>> +                        goto execute;
>>> +                    }
>>> +                    if (OVS_LIKELY(comb_last_action)) {
>>> +                        odp_execute_actions(dp, batch, 1, comb_actions,
>>> +                                        comb_action_len, dp_execute_action);
>>> +                        return;
>>> +                    }
>>> +                    else  {
>>> +                        /* Packets to be duplicated for the sub action */
>>> +                        struct dp_packet_batch comb_pkt_batch;
>>> +                        dp_packet_batch_clone(&comb_pkt_batch, batch);
>>> +                        dp_packet_batch_reset_cutlen(batch);
>>> +                        odp_execute_actions(dp, &comb_pkt_batch, steal,
>>> +                                        comb_actions, comb_action_len,
>>> +                                        dp_execute_action);
>>> +                    }
>>> +                    continue;
>>> +                }
>>> +execute:
>>>                dp_execute_action(dp, batch, a, may_steal);
>>> 
>>>                if (last_action) {
>>> @@ -683,6 +736,8 @@ odp_execute_actions(void *dp, struct
>> dp_packet_batch *batch, bool steal,
>>>        case OVS_ACTION_ATTR_RECIRC:
>>>        case OVS_ACTION_ATTR_CT:
>>>        case OVS_ACTION_ATTR_UNSPEC:
>>> +        case OVS_ACTION_ATTR_START_COMBINE:
>>> +        case OVS_ACTION_ATTR_END_COMBINE:
>>>        case __OVS_ACTION_ATTR_MAX:
>>>            OVS_NOT_REACHED();
>>>        }
>>> diff --git a/lib/odp-util.c b/lib/odp-util.c index 427dd65..9cb8629
>>> 100644
>>> --- a/lib/odp-util.c
>>> +++ b/lib/odp-util.c
>>> @@ -121,6 +121,8 @@ odp_action_len(uint16_t type)
>>>    case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;
>>>    case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;
>>>    case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE;
>>> +    case OVS_ACTION_ATTR_START_COMBINE: return sizeof(uint32_t);
>>> +    case OVS_ACTION_ATTR_END_COMBINE: return 0;
>>> 
>>>    case OVS_ACTION_ATTR_UNSPEC:
>>>    case __OVS_ACTION_ATTR_MAX:
>>> @@ -509,7 +511,7 @@ format_odp_tnl_push_header(struct ds *ds, struct
>> ovs_action_push_tnl *data)
>>>                      gnh->oam ? "oam," : "",
>>>                      gnh->critical ? "crit," : "",
>>>                      ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
>>> -
>>> +
>>>        if (gnh->opt_len) {
>>>            ds_put_cstr(ds, ",options(");
>>>            format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
>>> @@ -865,6 +867,17 @@ format_odp_action(struct ds *ds, const struct
>> nlattr *a)
>>>    case OVS_ACTION_ATTR_CT:
>>>        format_odp_conntrack_action(ds, a);
>>>        break;
>>> +    case OVS_ACTION_ATTR_START_COMBINE: {
>>> +        uint32_t comb_len;
>>> +        comb_len = nl_attr_get_u32(a);
>>> +        if (comb_len) {
>>> +            ds_put_cstr(ds,"{");
>>> +        }
>>> +        break;
>>> +    }
>>> +    case OVS_ACTION_ATTR_END_COMBINE:
>>> +        ds_put_cstr(ds,"}");
>>> +        break;
>>>    case OVS_ACTION_ATTR_UNSPEC:
>>>    case __OVS_ACTION_ATTR_MAX:
>>>    default:
>>> @@ -873,6 +886,34 @@ format_odp_action(struct ds *ds, const struct
>> nlattr *a)
>>>    }
>>> }
>>> 
>>> +static bool
>>> +is_action_combined(const struct nlattr *a, size_t len) {
>>> +    bool last_action = false;
>>> +    last_action = (len <= NLA_ALIGN(a->nla_len));
>>> +    enum ovs_action_attr type = nl_attr_type(a);
>>> +    if (type ==  OVS_ACTION_ATTR_START_COMBINE) {
>>> +        uint32_t comb_len;
>>> +        comb_len = nl_attr_get_u32(a);
>>> +        if (comb_len) {
>>> +            return true;
>>> +        }
>>> +    }
>>> +    else if (type == OVS_ACTION_ATTR_END_COMBINE) {
>>> +        return true;
>>> +    }
>>> +    if (!last_action) {
>>> +        type =  nl_attr_type(nl_attr_next(a));
>>> +        if (type == OVS_ACTION_ATTR_END_COMBINE) {
>>> +            return true;
>>> +        }
>>> +    }
>>> +    else {
>>> +        return true;
>>> +    }
>>> +    return false;
>>> +}
>>> +
>>> void
>>> format_odp_actions(struct ds *ds, const struct nlattr *actions,
>>>                   size_t actions_len) @@ -882,10 +923,10 @@
>>> format_odp_actions(struct ds *ds, const struct nlattr *actions,
>>>        unsigned int left;
>>> 
>>>        NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
>>> -            if (a != actions) {
>>> +            format_odp_action(ds, a);
>>> +            if (!is_action_combined(a, left)) {
>>>                ds_put_char(ds, ',');
>>>            }
>>> -            format_odp_action(ds, a);
>>>        }
>>>        if (left) {
>>>            int i;
>>> @@ -5338,6 +5379,27 @@ odp_put_tunnel_action(const struct flow_tnl
>>> *tunnel, }
>>> 
>>> void
>>> +odp_put_combine_start_action(struct ofpbuf *odp_actions, uint32_t
>>> +action_len) {
>>> +    nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_START_COMBINE,
>>> +action_len); }
>>> +
>>> +void
>>> +odp_update_combine_start_action(struct nlattr *start_combine,
>>> +                                uint32_t combine_size) {
>>> +    uint32_t *combine_len;
>>> +    combine_len = (uint32_t *)(start_combine + 1);
>>> +    *combine_len = combine_size;
>>> +}
>>> +
>>> +void
>>> +odp_put_combine_end_action(struct ofpbuf *odp_actions) {
>>> +    nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_END_COMBINE); }
>>> +
>>> +void
>>> odp_put_tnl_push_action(struct ofpbuf *odp_actions,
>>>                        struct ovs_action_push_tnl *data) { diff --git
>>> a/lib/odp-util.h b/lib/odp-util.h index 42011bc..b77faee 100644
>>> --- a/lib/odp-util.h
>>> +++ b/lib/odp-util.h
>>> @@ -308,4 +308,10 @@ void odp_put_tunnel_action(const struct flow_tnl
>>> *tunnel,
>>> 
>>> void odp_put_tnl_push_action(struct ofpbuf *odp_actions,
>>>                             struct ovs_action_push_tnl *data);
>>> +void odp_put_combine_end_action(struct ofpbuf *odp_actions); void
>>> +odp_put_combine_start_action(struct ofpbuf *odp_actions,
>>> +                                  uint32_t action_len); void
>>> +odp_update_combine_start_action(struct nlattr *start_combine,
>>> +                                uint32_t combine_size);
>>> #endif /* odp-util.h */
>>> diff --git a/ofproto/ofproto-dpif-sflow.c
>>> b/ofproto/ofproto-dpif-sflow.c index 37992b4..893c1a5 100644
>>> --- a/ofproto/ofproto-dpif-sflow.c
>>> +++ b/ofproto/ofproto-dpif-sflow.c
>>> @@ -1163,6 +1163,8 @@ dpif_sflow_read_actions(const struct flow *flow,
>>> 	}
>>> 	case OVS_ACTION_ATTR_SAMPLE:
>>> 	case OVS_ACTION_ATTR_UNSPEC:
>>> +    case OVS_ACTION_ATTR_START_COMBINE:
>>> +    case OVS_ACTION_ATTR_END_COMBINE:
>>> 	case __OVS_ACTION_ATTR_MAX:
>>> 	default:
>>> 	    break;
>>> diff --git a/ofproto/ofproto-dpif-xlate.c
>>> b/ofproto/ofproto-dpif-xlate.c index 2977be5..f26896f 100644
>>> --- a/ofproto/ofproto-dpif-xlate.c
>>> +++ b/ofproto/ofproto-dpif-xlate.c
>>> @@ -410,6 +410,9 @@ const char *xlate_strerror(enum xlate_error error)
>>> static void xlate_action_set(struct xlate_ctx *ctx); static void
>>> xlate_commit_actions(struct xlate_ctx *ctx);
>>> 
>>> +static int
>>> +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev);
>>> +
>>> static void
>>> ctx_trigger_freeze(struct xlate_ctx *ctx) { @@ -2865,7 +2868,22 @@
>>> build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
>>>    }
>>>    tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
>>>    tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
>>> +
>>> +    struct nlattr *start_combine;
>>> +    uint32_t action_size = 0;
>>> +    uint32_t push_action_size = 0;
>>> +    start_combine = ofpbuf_tail(ctx->odp_actions);
>>> +    odp_put_combine_start_action(ctx->odp_actions, 0);
>>> +    action_size = ctx->odp_actions->size;
>>>    odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
>>> +    push_action_size = ctx->odp_actions->size;
>>> +    combine_tunnel_actions(ctx, out_dev);
>>> +    if (ctx->odp_actions->size > push_action_size) {
>>> +        /* Update the combine action only when combined */
>>> +        odp_update_combine_start_action(start_combine,
>>> +                                    (ctx->odp_actions->size - action_size));
>>> +        odp_put_combine_end_action(ctx->odp_actions);
>>> +    }
>>>    return 0;
>>> }
>>> 
>>> @@ -2903,6 +2921,77 @@ xlate_flow_is_protected(const struct xlate_ctx
>> *ctx, const struct flow *flow, co
>>>            xport_in->xbundle->protected &&
>>> xport_out->xbundle->protected); }
>>> 
>>> +static int
>>> +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev)
>>> +{
>>> +    struct flow *flow = &ctx->xin->flow;
>>> +    struct flow old_flow = ctx->xin->flow;
>>> +    bool old_conntrack = ctx->conntracked;
>>> +    bool old_was_mpls = ctx->was_mpls;
>>> +    ovs_version_t old_version = ctx->xin->tables_version;
>>> +    struct ofpbuf old_stack = ctx->stack;
>>> +    union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
>>> +    struct ofpbuf old_action_set = ctx->action_set;
>>> +    uint64_t actset_stub[1024 / 8];
>>> +    const struct xbridge *xbridge_old = ctx->xbridge;
>>> +
>>> +    ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
>>> +    ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof
>>> + actset_stub);
>>> +
>>> +    struct xbridge *xbridge = NULL;
>>> +
>>> +    xbridge = out_dev->xbridge;
>>> +
>>> +    ctx->xbridge = xbridge;   /* peer bridge replaced*/
>>> +    flow->in_port.ofp_port = out_dev->ofp_port; /* peer port replaced */
>>> +    flow->metadata = htonll(0);
>>> +    memset(&flow->tunnel, 0, sizeof flow->tunnel);
>>> +    memset(flow->regs, 0, sizeof flow->regs);
>>> +    flow->actset_output = OFPP_UNSET;
>>> +    ctx->conntracked = false;
>>> +    clear_conntrack(flow);
>>> +
>>> +    /* The bridge is now known so obtain its table version. */
>>> +    ctx->xin->tables_version
>>> +        = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
>>> +
>>> +    xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
>>> +    if (!ctx->freezing) {
>>> +        xlate_action_set(ctx);
>>> +    }
>>> +    if (ctx->freezing) {
>>> +        finish_freezing(ctx);
>>> +    }
>>> +
>>> +    ctx->xin->flow = old_flow;
>>> +    ctx->xbridge = xbridge_old;
>>> +    ofpbuf_uninit(&ctx->action_set);
>>> +    ctx->action_set = old_action_set;
>>> +    ofpbuf_uninit(&ctx->stack);
>>> +    ctx->stack = old_stack;
>>> +
>>> +    /* Restore calling bridge's lookup version. */
>>> +    ctx->xin->tables_version = old_version;
>>> +
>>> +    /* The peer bridge popping MPLS should have no effect on the original
>>> +     * bridge. */
>>> +    ctx->was_mpls = old_was_mpls;
>>> +
>>> +    /* The peer bridge's conntrack execution should have no effect on the
>>> +     * original bridge. */
>>> +    ctx->conntracked = old_conntrack;
>>> +
>>> +    /* The fact that the peer bridge exits (for any reason) does not mean
>>> +     * that the original bridge should exit.  Specifically, if the peer
>>> +     * bridge freezes translation, the original bridge must continue
>>> +     * processing with the original, not the frozen packet! */
>>> +    ctx->exit = false;
>>> +
>>> +    /* Peer bridge errors do not propagate back. */
>>> +    ctx->error = XLATE_OK;
>>> +
>>> +    return 0;
>>> +}
>>> 
>>> static void
>>> compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
>>> --
>>> 2.7.4
>>> 
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox

Patch

diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 12260d8..4e9c7f7 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -818,6 +818,8 @@  enum ovs_action_attr {
 #ifndef __KERNEL__
 	OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct ovs_action_push_tnl*/
 	OVS_ACTION_ATTR_TUNNEL_POP,    /* u32 port number. */
+	OVS_ACTION_ATTR_START_COMBINE,
+	OVS_ACTION_ATTR_END_COMBINE,
 #endif
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0b73056..2e59bd1 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4430,24 +4430,8 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
 
     case OVS_ACTION_ATTR_TUNNEL_PUSH:
         if (*depth < MAX_RECIRC_DEPTH) {
-            struct dp_packet_batch tnl_pkt;
-            struct dp_packet_batch *orig_packets_ = packets_;
-            int err;
-
-            if (!may_steal) {
-                dp_packet_batch_clone(&tnl_pkt, packets_);
-                packets_ = &tnl_pkt;
-                dp_packet_batch_reset_cutlen(orig_packets_);
-            }
-
             dp_packet_batch_apply_cutlen(packets_);
-
-            err = push_tnl_action(pmd, a, packets_);
-            if (!err) {
-                (*depth)++;
-                dp_netdev_recirculate(pmd, packets_);
-                (*depth)--;
-            }
+            push_tnl_action(pmd, a, packets_);
             return;
         }
         break;
@@ -4597,6 +4581,10 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         break;
     }
 
+    case OVS_ACTION_ATTR_START_COMBINE:
+        break;
+    case OVS_ACTION_ATTR_END_COMBINE:
+        break;
     case OVS_ACTION_ATTR_PUSH_VLAN:
     case OVS_ACTION_ATTR_POP_VLAN:
     case OVS_ACTION_ATTR_PUSH_MPLS:
diff --git a/lib/dpif.c b/lib/dpif.c
index 53958c5..6477f33 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1187,6 +1187,8 @@  dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
     case OVS_ACTION_ATTR_SAMPLE:
     case OVS_ACTION_ATTR_TRUNC:
     case OVS_ACTION_ATTR_UNSPEC:
+    case OVS_ACTION_ATTR_START_COMBINE:
+    case OVS_ACTION_ATTR_END_COMBINE:
     case __OVS_ACTION_ATTR_MAX:
         OVS_NOT_REACHED();
     }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 4fcff16..c475f98 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -532,6 +532,8 @@  requires_datapath_assistance(const struct nlattr *a)
     case OVS_ACTION_ATTR_USERSPACE:
     case OVS_ACTION_ATTR_RECIRC:
     case OVS_ACTION_ATTR_CT:
+    case OVS_ACTION_ATTR_START_COMBINE:
+    case OVS_ACTION_ATTR_END_COMBINE:
         return true;
 
     case OVS_ACTION_ATTR_SET:
@@ -553,6 +555,29 @@  requires_datapath_assistance(const struct nlattr *a)
     return false;
 }
 
+static inline size_t
+find_combine_action_end(const struct nlattr **actions_, size_t *actions_len,
+                        const struct nlattr **comb_start,
+                        bool *comb_last_action)
+{
+    const struct nlattr *a;
+    const struct nlattr *actions = *actions_;
+    unsigned int left;
+    bool last_action;
+    size_t comb_size = nl_attr_get_u32(actions); /* Size of combined actions */
+    *actions_len -= NLA_ALIGN(actions->nla_len);
+    actions = nl_attr_next(actions);
+
+    *comb_start = actions;
+    a = (void *) ((uint8_t *) actions + NLA_ALIGN(comb_size));
+    left = (*actions_len) - comb_size;
+    last_action = (left <= NLA_ALIGN(a->nla_len));
+    *actions_ = a;
+    *comb_last_action = last_action ? true : false;
+    *actions_len = left;
+    return comb_size;
+}
+
 void
 odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
                     const struct nlattr *actions, size_t actions_len,
@@ -561,7 +586,7 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
     struct dp_packet **packets = batch->packets;
     int cnt = batch->count;
     const struct nlattr *a;
-    unsigned int left;
+    size_t left;
     int i;
 
     NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
@@ -573,7 +598,35 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
                 /* Allow 'dp_execute_action' to steal the packet data if we do
                  * not need it any more. */
                 bool may_steal = steal && last_action;
-
+                if ((enum ovs_action_attr) type ==
+                                           OVS_ACTION_ATTR_START_COMBINE) {
+                    size_t comb_action_len;
+                    const struct nlattr *comb_actions = NULL;
+                    bool comb_last_action = true;
+                    comb_action_len = find_combine_action_end(&a, &left,
+                                                            &comb_actions,
+                                                            &comb_last_action);
+                    if (OVS_UNLIKELY(!comb_action_len)) {
+                        /* No combine present, execute current action */
+                        goto execute;
+                    }
+                    if (OVS_LIKELY(comb_last_action)) {
+                        odp_execute_actions(dp, batch, 1, comb_actions,
+                                        comb_action_len, dp_execute_action);
+                        return;
+                    }
+                    else  {
+                        /* Packets to be duplicated for the sub action */
+                        struct dp_packet_batch comb_pkt_batch;
+                        dp_packet_batch_clone(&comb_pkt_batch, batch);
+                        dp_packet_batch_reset_cutlen(batch);
+                        odp_execute_actions(dp, &comb_pkt_batch, steal,
+                                        comb_actions, comb_action_len,
+                                        dp_execute_action);
+                    }
+                    continue;
+                }
+execute:
                 dp_execute_action(dp, batch, a, may_steal);
 
                 if (last_action) {
@@ -683,6 +736,8 @@  odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
         case OVS_ACTION_ATTR_RECIRC:
         case OVS_ACTION_ATTR_CT:
         case OVS_ACTION_ATTR_UNSPEC:
+        case OVS_ACTION_ATTR_START_COMBINE:
+        case OVS_ACTION_ATTR_END_COMBINE:
         case __OVS_ACTION_ATTR_MAX:
             OVS_NOT_REACHED();
         }
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 427dd65..9cb8629 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -121,6 +121,8 @@  odp_action_len(uint16_t type)
     case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE;
     case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE;
     case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE;
+    case OVS_ACTION_ATTR_START_COMBINE: return sizeof(uint32_t);
+    case OVS_ACTION_ATTR_END_COMBINE: return 0;
 
     case OVS_ACTION_ATTR_UNSPEC:
     case __OVS_ACTION_ATTR_MAX:
@@ -509,7 +511,7 @@  format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
                       gnh->oam ? "oam," : "",
                       gnh->critical ? "crit," : "",
                       ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
- 
+
         if (gnh->opt_len) {
             ds_put_cstr(ds, ",options(");
             format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4,
@@ -865,6 +867,17 @@  format_odp_action(struct ds *ds, const struct nlattr *a)
     case OVS_ACTION_ATTR_CT:
         format_odp_conntrack_action(ds, a);
         break;
+    case OVS_ACTION_ATTR_START_COMBINE: {
+        uint32_t comb_len;
+        comb_len = nl_attr_get_u32(a);
+        if (comb_len) {
+            ds_put_cstr(ds,"{");
+        }
+        break;
+    }
+    case OVS_ACTION_ATTR_END_COMBINE:
+        ds_put_cstr(ds,"}");
+        break;
     case OVS_ACTION_ATTR_UNSPEC:
     case __OVS_ACTION_ATTR_MAX:
     default:
@@ -873,6 +886,34 @@  format_odp_action(struct ds *ds, const struct nlattr *a)
     }
 }
 
+static bool
+is_action_combined(const struct nlattr *a, size_t len)
+{
+    bool last_action = false;
+    last_action = (len <= NLA_ALIGN(a->nla_len));
+    enum ovs_action_attr type = nl_attr_type(a);
+    if (type ==  OVS_ACTION_ATTR_START_COMBINE) {
+        uint32_t comb_len;
+        comb_len = nl_attr_get_u32(a);
+        if (comb_len) {
+            return true;
+        }
+    }
+    else if (type == OVS_ACTION_ATTR_END_COMBINE) {
+        return true;
+    }
+    if (!last_action) {
+        type =  nl_attr_type(nl_attr_next(a));
+        if (type == OVS_ACTION_ATTR_END_COMBINE) {
+            return true;
+        }
+    }
+    else {
+        return true;
+    }
+    return false;
+}
+
 void
 format_odp_actions(struct ds *ds, const struct nlattr *actions,
                    size_t actions_len)
@@ -882,10 +923,10 @@  format_odp_actions(struct ds *ds, const struct nlattr *actions,
         unsigned int left;
 
         NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
-            if (a != actions) {
+            format_odp_action(ds, a);
+            if (!is_action_combined(a, left)) {
                 ds_put_char(ds, ',');
             }
-            format_odp_action(ds, a);
         }
         if (left) {
             int i;
@@ -5338,6 +5379,27 @@  odp_put_tunnel_action(const struct flow_tnl *tunnel,
 }
 
 void
+odp_put_combine_start_action(struct ofpbuf *odp_actions, uint32_t action_len)
+{
+    nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_START_COMBINE, action_len);
+}
+
+void
+odp_update_combine_start_action(struct nlattr *start_combine,
+                                uint32_t combine_size)
+{
+    uint32_t *combine_len;
+    combine_len = (uint32_t *)(start_combine + 1);
+    *combine_len = combine_size;
+}
+
+void
+odp_put_combine_end_action(struct ofpbuf *odp_actions)
+{
+    nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_END_COMBINE);
+}
+
+void
 odp_put_tnl_push_action(struct ofpbuf *odp_actions,
                         struct ovs_action_push_tnl *data)
 {
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 42011bc..b77faee 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -308,4 +308,10 @@  void odp_put_tunnel_action(const struct flow_tnl *tunnel,
 
 void odp_put_tnl_push_action(struct ofpbuf *odp_actions,
                              struct ovs_action_push_tnl *data);
+void odp_put_combine_end_action(struct ofpbuf *odp_actions);
+void odp_put_combine_start_action(struct ofpbuf *odp_actions,
+                                  uint32_t action_len);
+void
+odp_update_combine_start_action(struct nlattr *start_combine,
+                                uint32_t combine_size);
 #endif /* odp-util.h */
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index 37992b4..893c1a5 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -1163,6 +1163,8 @@  dpif_sflow_read_actions(const struct flow *flow,
 	}
 	case OVS_ACTION_ATTR_SAMPLE:
 	case OVS_ACTION_ATTR_UNSPEC:
+    case OVS_ACTION_ATTR_START_COMBINE:
+    case OVS_ACTION_ATTR_END_COMBINE:
 	case __OVS_ACTION_ATTR_MAX:
 	default:
 	    break;
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 2977be5..f26896f 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -410,6 +410,9 @@  const char *xlate_strerror(enum xlate_error error)
 static void xlate_action_set(struct xlate_ctx *ctx);
 static void xlate_commit_actions(struct xlate_ctx *ctx);
 
+static int
+combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev);
+
 static void
 ctx_trigger_freeze(struct xlate_ctx *ctx)
 {
@@ -2865,7 +2868,22 @@  build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
     }
     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
+
+    struct nlattr *start_combine;
+    uint32_t action_size = 0;
+    uint32_t push_action_size = 0;
+    start_combine = ofpbuf_tail(ctx->odp_actions);
+    odp_put_combine_start_action(ctx->odp_actions, 0);
+    action_size = ctx->odp_actions->size;
     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
+    push_action_size = ctx->odp_actions->size;
+    combine_tunnel_actions(ctx, out_dev);
+    if (ctx->odp_actions->size > push_action_size) {
+        /* Update the combine action only when combined */
+        odp_update_combine_start_action(start_combine,
+                                    (ctx->odp_actions->size - action_size));
+        odp_put_combine_end_action(ctx->odp_actions);
+    }
     return 0;
 }
 
@@ -2903,6 +2921,77 @@  xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co
             xport_in->xbundle->protected && xport_out->xbundle->protected);
 }
 
+static int
+combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev)
+{
+    struct flow *flow = &ctx->xin->flow;
+    struct flow old_flow = ctx->xin->flow;
+    bool old_conntrack = ctx->conntracked;
+    bool old_was_mpls = ctx->was_mpls;
+    ovs_version_t old_version = ctx->xin->tables_version;
+    struct ofpbuf old_stack = ctx->stack;
+    union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
+    struct ofpbuf old_action_set = ctx->action_set;
+    uint64_t actset_stub[1024 / 8];
+    const struct xbridge *xbridge_old = ctx->xbridge;
+
+    ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
+    ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
+
+    struct xbridge *xbridge = NULL;
+
+    xbridge = out_dev->xbridge;
+
+    ctx->xbridge = xbridge;   /* peer bridge replaced*/
+    flow->in_port.ofp_port = out_dev->ofp_port; /* peer port replaced */
+    flow->metadata = htonll(0);
+    memset(&flow->tunnel, 0, sizeof flow->tunnel);
+    memset(flow->regs, 0, sizeof flow->regs);
+    flow->actset_output = OFPP_UNSET;
+    ctx->conntracked = false;
+    clear_conntrack(flow);
+
+    /* The bridge is now known so obtain its table version. */
+    ctx->xin->tables_version
+        = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
+
+    xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
+    if (!ctx->freezing) {
+        xlate_action_set(ctx);
+    }
+    if (ctx->freezing) {
+        finish_freezing(ctx);
+    }
+
+    ctx->xin->flow = old_flow;
+    ctx->xbridge = xbridge_old;
+    ofpbuf_uninit(&ctx->action_set);
+    ctx->action_set = old_action_set;
+    ofpbuf_uninit(&ctx->stack);
+    ctx->stack = old_stack;
+
+    /* Restore calling bridge's lookup version. */
+    ctx->xin->tables_version = old_version;
+
+    /* The peer bridge popping MPLS should have no effect on the original
+     * bridge. */
+    ctx->was_mpls = old_was_mpls;
+
+    /* The peer bridge's conntrack execution should have no effect on the
+     * original bridge. */
+    ctx->conntracked = old_conntrack;
+
+    /* The fact that the peer bridge exits (for any reason) does not mean
+     * that the original bridge should exit.  Specifically, if the peer
+     * bridge freezes translation, the original bridge must continue
+     * processing with the original, not the frozen packet! */
+    ctx->exit = false;
+
+    /* Peer bridge errors do not propagate back. */
+    ctx->error = XLATE_OK;
+
+    return 0;
+}
 
 static void
 compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,