diff mbox

[ovs-dev] xlate: Skip recirculation for output and set actions

Message ID 1464074966-28774-1-git-send-email-simon.horman@netronome.com
State Superseded
Headers show

Commit Message

Simon Horman May 24, 2016, 7:29 a.m. UTC
Until 8bf009bf8ab4 ("xlate: Always recirculate after an MPLS POP to a
non-MPLS ethertype.") the translation code took some care to only
recirculate as a result of a pop_mpls action if necessary. This was
implemented using per-action checks and resulted in some maintenance
burden.

Unfortunately recirculation is a relatively expensive operation and a
performance degradation of up to 35% has been observed with the above
mentioned patch applied for the arguably common case of:

	pop_mpls,set(l2 field),output

This patch attempts to strike a balance between performance and
maintainability by special casing set and output actions such
that recirculation may be avoided.

This partially reverts the above mentioned commit. In particular most
of the C code outside of do_xlate_actions().

Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
* Lightly tested using test-suite portion of this patch

v2
* Trigger recirculation for select groups as bucket selection
  may access fields above L2.5.
* Add test to exercise that recirculation is triggered for patch ports
---
 ofproto/ofproto-dpif-xlate.c | 117 ++++++++++++++++++++++++++++++++++++++++++-
 tests/mpls-xlate.at          |  78 +++++++++++++++++++++++++++--
 tests/ofproto-dpif.at        |   3 +-
 3 files changed, 191 insertions(+), 7 deletions(-)

Comments

Jarno Rajahalme May 24, 2016, 7:36 p.m. UTC | #1
One comment below, otherwise looks good,

Acked-by: Jarno Rajahalme <jarno@ovn.org>


> On May 24, 2016, at 12:29 AM, Simon Horman <simon.horman@netronome.com> wrote:
> 
> Until 8bf009bf8ab4 ("xlate: Always recirculate after an MPLS POP to a
> non-MPLS ethertype.") the translation code took some care to only
> recirculate as a result of a pop_mpls action if necessary. This was
> implemented using per-action checks and resulted in some maintenance
> burden.
> 
> Unfortunately recirculation is a relatively expensive operation and a
> performance degradation of up to 35% has been observed with the above
> mentioned patch applied for the arguably common case of:
> 
> 	pop_mpls,set(l2 field),output
> 
> This patch attempts to strike a balance between performance and
> maintainability by special casing set and output actions such
> that recirculation may be avoided.
> 
> This partially reverts the above mentioned commit. In particular most
> of the C code outside of do_xlate_actions().
> 
> Signed-off-by: Simon Horman <simon.horman@netronome.com>
> ---
> * Lightly tested using test-suite portion of this patch
> 
> v2
> * Trigger recirculation for select groups as bucket selection
>  may access fields above L2.5.
> * Add test to exercise that recirculation is triggered for patch ports
> ---
> ofproto/ofproto-dpif-xlate.c | 117 ++++++++++++++++++++++++++++++++++++++++++-
> tests/mpls-xlate.at          |  78 +++++++++++++++++++++++++++--
> tests/ofproto-dpif.at        |   3 +-
> 3 files changed, 191 insertions(+), 7 deletions(-)
> 
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index af26c64fcc39..025333ba56eb 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -349,6 +349,12 @@ struct xlate_ctx {
>     struct ofpbuf frozen_actions;
>     const struct ofpact_controller *pause;
> 
> +    /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
> +     * This is a trigger for recirculation in cases where translating an action
> +     * or looking up a flow requires access to the fields of the packet after
> +     * the MPLS label stack that was originally present. */
> +    bool was_mpls;
> +
>     /* True if conntrack has been performed on this packet during processing
>      * on the current bridge. This is used to determine whether conntrack
>      * state from the datapath should be honored after thawing. */
> @@ -3029,6 +3035,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
>         const struct xport *peer = xport->peer;
>         struct flow old_flow = ctx->xin->flow;
>         bool old_conntrack = ctx->conntracked;
> +        bool old_was_mpls = ctx->was_mpls;
>         cls_version_t old_version = ctx->tables_version;
>         struct ofpbuf old_stack = ctx->stack;
>         union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
> @@ -3086,6 +3093,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
>         /* Restore calling bridge's lookup version. */
>         ctx->tables_version = old_version;
> 
> +        /* The peer bridge popping MPLS should have no effect on the original
> +         * bridge. */
> +        ctx->was_mpls = old_was_mpls;
> +
>         /* The peer bridge's conntrack execution should have no effect on the
>          * original bridge. */
>         ctx->conntracked = old_conntrack;
> @@ -3294,6 +3305,11 @@ static void
> xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
>                    bool may_packet_in, bool honor_table_miss)
> {
> +    /* Check if we need to recirculate before matching in a table. */
> +    if (ctx->was_mpls) {
> +        ctx_trigger_freeze(ctx);
> +        return;
> +    }
>     if (xlate_resubmit_resource_check(ctx)) {
>         uint8_t old_table_id = ctx->table_id;
>         struct rule_dpif *rule;
> @@ -3355,6 +3371,7 @@ xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
>     struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
>                                                         bucket->ofpacts_len);
>     struct flow old_flow = ctx->xin->flow;
> +    bool old_was_mpls = ctx->was_mpls;
> 
>     ofpacts_execute_action_set(&action_list, &action_set);
>     ctx->indentation++;
> @@ -3386,6 +3403,10 @@ xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
>      * group buckets. */
>     ctx->xin->flow = old_flow;
> 
> +    /* The group bucket popping MPLS should have no effect after bucket
> +     * execution. */
> +    ctx->was_mpls = old_was_mpls;
> +
>     /* The fact that the group bucket exits (for any reason) does not mean that
>      * the translation after the group action should exit.  Specifically, if
>      * the group bucket freezes translation, the actions after the group action
> @@ -3509,6 +3530,13 @@ xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
> {
>     const char *selection_method = group_dpif_get_selection_method(group);
> 
> +    /* Select groups may access flow keys beyond L2 in order to
> +     * select a bucket. Recirculate as appropriate to make this possible.
> +     */
> +    if (ctx->was_mpls) {
> +        ctx_trigger_freeze(ctx);
> +    }
> +
>     if (selection_method[0] == '\0') {
>         xlate_default_select_group(ctx, group);
>     } else if (!strcasecmp("hash", selection_method)) {
> @@ -3802,7 +3830,7 @@ compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
> 
>     if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
>         if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
> -            ctx_trigger_freeze(ctx);
> +            ctx->was_mpls = true;
>         }
>     } else if (n >= FLOW_MAX_MPLS_LABELS) {
>         if (ctx->xin->packet != NULL) {
> @@ -4478,6 +4506,90 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
> }
> 
> static void
> +recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
> +{
> +    /* Do not consider recirculating unless the packet was previously MPLS. */
> +    if (!ctx->was_mpls) {
> +        return;
> +    }
> +
> +    /* Special case these actions, only recirculating if necessary.
> +     * This avoids the overhead of recirculation in common use-cases.
> +     */
> +    switch (a->type) {
> +
> +    /* Output actions  do not require recirculation. */
> +    case OFPACT_OUTPUT:
> +    case OFPACT_ENQUEUE:
> +    case OFPACT_OUTPUT_REG:
> +    /* Set actions that don't touch L3+ fields do not require recirculation. */
> +    case OFPACT_SET_VLAN_VID:
> +    case OFPACT_SET_VLAN_PCP:
> +    case OFPACT_SET_ETH_SRC:
> +    case OFPACT_SET_ETH_DST:
> +    case OFPACT_SET_TUNNEL:
> +    case OFPACT_SET_QUEUE:
> +    /* If actions of a group require recirculation that can be detected
> +     * when translating them. */
> +    case OFPACT_GROUP:
> +        return;
> +
> +    /* Set field that don't touch L3+ fields don't require recirculation. */
> +    case OFPACT_SET_FIELD:
> +        if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) {
> +            break;
> +        }
> +        return;
> +
> +    /* For simplicity, recirculate in all other cases. */
> +    case OFPACT_CONTROLLER:
> +    case OFPACT_BUNDLE:
> +    case OFPACT_STRIP_VLAN:
> +    case OFPACT_PUSH_VLAN:
> +    case OFPACT_SET_IPV4_SRC:
> +    case OFPACT_SET_IPV4_DST:
> +    case OFPACT_SET_IP_DSCP:
> +    case OFPACT_SET_IP_ECN:
> +    case OFPACT_SET_IP_TTL:
> +    case OFPACT_SET_L4_SRC_PORT:
> +    case OFPACT_SET_L4_DST_PORT:
> +    case OFPACT_REG_MOVE:
> +    case OFPACT_STACK_PUSH:
> +    case OFPACT_STACK_POP:
> +    case OFPACT_DEC_TTL:
> +    case OFPACT_SET_MPLS_LABEL:
> +    case OFPACT_SET_MPLS_TC:
> +    case OFPACT_SET_MPLS_TTL:
> +    case OFPACT_DEC_MPLS_TTL:
> +    case OFPACT_PUSH_MPLS:
> +    case OFPACT_POP_MPLS:
> +    case OFPACT_POP_QUEUE:
> +    case OFPACT_FIN_TIMEOUT:
> +    case OFPACT_RESUBMIT:
> +    case OFPACT_LEARN:
> +    case OFPACT_CONJUNCTION:
> +    case OFPACT_MULTIPATH:
> +    case OFPACT_NOTE:
> +    case OFPACT_EXIT:
> +    case OFPACT_SAMPLE:
> +    case OFPACT_UNROLL_XLATE:
> +    case OFPACT_CT:
> +    case OFPACT_NAT:
> +    case OFPACT_DEBUG_RECIRC:
> +    case OFPACT_METER:
> +    case OFPACT_CLEAR_ACTIONS:
> +    case OFPACT_WRITE_ACTIONS:
> +    case OFPACT_WRITE_METADATA:
> +    case OFPACT_GOTO_TABLE:
> +    default:
> +        break;
> +    }
> +
> +    /* Recirculate */
> +    ctx_trigger_freeze(ctx);
> +}
> +
> +static void
> do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>                  struct xlate_ctx *ctx)
> {
> @@ -4500,6 +4612,8 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>             break;
>         }
> 
> +        recirc_for_mpls(a, ctx);
> +

IMO this should not be done when 'ctx->exit' has already been set, as it may trigger an unnecessary recirculation in that case.

  Jarno

>         if (ctx->exit) {
>             /* Check if need to store the remaining actions for later
>              * execution. */
> @@ -5151,6 +5265,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
>         .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
>         .pause = NULL,
> 
> +        .was_mpls = false,
>         .conntracked = false,
> 
>         .ct_nat_action = NULL,
> diff --git a/tests/mpls-xlate.at b/tests/mpls-xlate.at
> index 7df52f513e7e..a9a3cf53bed3 100644
> --- a/tests/mpls-xlate.at
> +++ b/tests/mpls-xlate.at
> @@ -2,18 +2,43 @@ AT_BANNER([mpls-xlate])
> 
> AT_SETUP([MPLS xlate action])
> 
> -OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1])
> +OVS_VSWITCHD_START(
> +  [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \
> +   add-port br0 p1 -- set Interface p1 type=patch \
> +                                       options:peer=p2 ofport_request=2 -- \
> +   add-br br1 -- \
> +   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
> +   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
> +                  fail-mode=secure -- \
> +   add-port br1 p2 -- set Interface p2 type=patch \
> +                                       options:peer=p1])
> 
> AT_CHECK([ovs-appctl dpif/show], [0], [dnl
> dummy@ovs-dummy: hit:0 missed:0
> 	br0:
> 		br0 65534/100: (dummy)
> 		p0 1/1: (dummy)
> +		p1 2/none: (patch: peer=p2)
> +	br1:
> +		br1 65534/101: (dummy)
> +		p2 1/none: (patch: peer=p1)
> ])
> 
> dnl Setup single MPLS tags.
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 group_id=1232,type=select,bucket=output:LOCAL])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 group_id=1233,type=all,bucket=output:LOCAL])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 group_id=1234,type=all,bucket=dec_ttl,output:LOCAL])
> AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 in_port=local,dl_type=0x0800,action=push_mpls:0x8847,set_field:10-\>mpls_label,output:1])
> AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=20,action=pop_mpls:0x0800,output:LOCAL])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=21,action=pop_mpls:0x0800,dec_ttl,output:LOCAL])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=22,action=pop_mpls:0x0800,group:1232])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=23,action=pop_mpls:0x0800,group:1233])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=24,action=pop_mpls:0x0800,group:1234])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=25,action=pop_mpls:0x0800,output:2])
> +
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 in_port=1,action=output:LOCAL])
> +
> +
> 
> dnl Test MPLS push
> AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=7777,dst=80)'], [0], [stdout])
> @@ -21,17 +46,62 @@ AT_CHECK([tail -1 stdout], [0],
>   [Datapath actions: push_mpls(label=10,tc=0,ttl=64,bos=1,eth_type=0x8847),1
> ])
> 
> -dnl Test MPLS pop
> +dnl Test MPLS pop then output (actions do not trigger reciculation)
> AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=20,tc=0,ttl=64,bos=1)'], [0], [stdout])
> AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: pop_mpls(eth_type=0x800),100
> +])
> +
> +dnl Test MPLS pop, dec_ttl, output (actions trigger recirculation)
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=21,tc=0,ttl=64,bos=1)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
>   [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x1)
> ])
> 
> AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(1),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
> AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: set(ipv4(ttl=63)),100
> +])
> +
> +dnl Test MPLS pop then select group output (group type triggers recirculation)
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=22,tc=0,ttl=64,bos=1)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x2)
> +])
> +
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
>   [Datapath actions: 100
> ])
> 
> +dnl Test MPLS pop then all group output (bucket actions do not trigger recirculation)
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=23,tc=0,ttl=64,bos=1)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: pop_mpls(eth_type=0x800),100
> +])
> +
> +dnl Test MPLS pop then all group output (bucket actions trigger recirculation)
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=24,tc=0,ttl=64,bos=1)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x3)
> +])
> +
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(3),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: set(ipv4(ttl=63)),100
> +])
> +
> +dnl Test MPLS pop then all output to patch port
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=25,tc=0,ttl=64,bos=1)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x4)
> +])
> +
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(4),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
> +AT_CHECK([tail -1 stdout], [0],
> +  [Datapath actions: 101
> +])
> +
> dnl Setup multiple MPLS tags.
> AT_CHECK([ovs-ofctl del-flows br0])
> 
> @@ -52,10 +122,10 @@ AT_CHECK([tail -1 stdout], [0],
> dnl Double MPLS pop
> AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=60,tc=0,ttl=64,bos=0,label=50,tc=0,ttl=64,bos=1)'], [0], [stdout])
> AT_CHECK([tail -1 stdout], [0],
> -  [Datapath actions: pop_mpls(eth_type=0x8847),pop_mpls(eth_type=0x800),recirc(0x2)
> +  [Datapath actions: pop_mpls(eth_type=0x8847),pop_mpls(eth_type=0x800),recirc(0x5)
> ])
> 
> -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
> +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(5),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
> AT_CHECK([tail -1 stdout], [0],
>   [Datapath actions: set(ipv4(ttl=10)),100
> ])
> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
> index 18e5e22265a5..835028123e87 100644
> --- a/tests/ofproto-dpif.at
> +++ b/tests/ofproto-dpif.at
> @@ -6415,8 +6415,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
> sleep 1
> AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout_keep_actions], [0], [dnl
> recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,mpls_label=11,mpls_tc=3,mpls_ttl=64,mpls_bos=1, actions:push_mpls(label=11,tc=3,ttl=64,bos=0,eth_type=0x8847),2
> -recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,mpls_bos=1, actions:pop_mpls(eth_type=0x800),recirc(0x1)
> -recirc_id=0x1,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, actions:2
> +recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,mpls_bos=1, actions:pop_mpls(eth_type=0x800),2
> ])
> OVS_VSWITCHD_STOP
> AT_CLEANUP
> -- 
> 2.1.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
Simon Horman May 25, 2016, 1:33 a.m. UTC | #2
On Tue, May 24, 2016 at 12:36:02PM -0700, Jarno Rajahalme wrote:
> One comment below, otherwise looks good,
> 
> Acked-by: Jarno Rajahalme <jarno@ovn.org>

[...]

> > On May 24, 2016, at 12:29 AM, Simon Horman <simon.horman@netronome.com> wrote:
> > 
> > Until 8bf009bf8ab4 ("xlate: Always recirculate after an MPLS POP to a
> > non-MPLS ethertype.") the translation code took some care to only
> > recirculate as a result of a pop_mpls action if necessary. This was
> > implemented using per-action checks and resulted in some maintenance
> > burden.
> > 
> > Unfortunately recirculation is a relatively expensive operation and a
> > performance degradation of up to 35% has been observed with the above
> > mentioned patch applied for the arguably common case of:
> > 
> > 	pop_mpls,set(l2 field),output
> > 
> > This patch attempts to strike a balance between performance and
> > maintainability by special casing set and output actions such
> > that recirculation may be avoided.
> > 
> > This partially reverts the above mentioned commit. In particular most
> > of the C code outside of do_xlate_actions().
> > 
> > Signed-off-by: Simon Horman <simon.horman@netronome.com>

[...]

> > @@ -4500,6 +4612,8 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
> >             break;
> >         }
> > 
> > +        recirc_for_mpls(a, ctx);
> > +
> 
> IMO this should not be done when 'ctx->exit' has already been set, as it may trigger an unnecessary recirculation in that case.

Thanks, good point.

I will make recirc_for_mpls return early if ctx->exit is already set.
diff mbox

Patch

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index af26c64fcc39..025333ba56eb 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -349,6 +349,12 @@  struct xlate_ctx {
     struct ofpbuf frozen_actions;
     const struct ofpact_controller *pause;
 
+    /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
+     * This is a trigger for recirculation in cases where translating an action
+     * or looking up a flow requires access to the fields of the packet after
+     * the MPLS label stack that was originally present. */
+    bool was_mpls;
+
     /* True if conntrack has been performed on this packet during processing
      * on the current bridge. This is used to determine whether conntrack
      * state from the datapath should be honored after thawing. */
@@ -3029,6 +3035,7 @@  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         const struct xport *peer = xport->peer;
         struct flow old_flow = ctx->xin->flow;
         bool old_conntrack = ctx->conntracked;
+        bool old_was_mpls = ctx->was_mpls;
         cls_version_t old_version = ctx->tables_version;
         struct ofpbuf old_stack = ctx->stack;
         union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
@@ -3086,6 +3093,10 @@  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
         /* Restore calling bridge's lookup version. */
         ctx->tables_version = old_version;
 
+        /* The peer bridge popping MPLS should have no effect on the original
+         * bridge. */
+        ctx->was_mpls = old_was_mpls;
+
         /* The peer bridge's conntrack execution should have no effect on the
          * original bridge. */
         ctx->conntracked = old_conntrack;
@@ -3294,6 +3305,11 @@  static void
 xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
                    bool may_packet_in, bool honor_table_miss)
 {
+    /* Check if we need to recirculate before matching in a table. */
+    if (ctx->was_mpls) {
+        ctx_trigger_freeze(ctx);
+        return;
+    }
     if (xlate_resubmit_resource_check(ctx)) {
         uint8_t old_table_id = ctx->table_id;
         struct rule_dpif *rule;
@@ -3355,6 +3371,7 @@  xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
     struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
                                                         bucket->ofpacts_len);
     struct flow old_flow = ctx->xin->flow;
+    bool old_was_mpls = ctx->was_mpls;
 
     ofpacts_execute_action_set(&action_list, &action_set);
     ctx->indentation++;
@@ -3386,6 +3403,10 @@  xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
      * group buckets. */
     ctx->xin->flow = old_flow;
 
+    /* The group bucket popping MPLS should have no effect after bucket
+     * execution. */
+    ctx->was_mpls = old_was_mpls;
+
     /* The fact that the group bucket exits (for any reason) does not mean that
      * the translation after the group action should exit.  Specifically, if
      * the group bucket freezes translation, the actions after the group action
@@ -3509,6 +3530,13 @@  xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
 {
     const char *selection_method = group_dpif_get_selection_method(group);
 
+    /* Select groups may access flow keys beyond L2 in order to
+     * select a bucket. Recirculate as appropriate to make this possible.
+     */
+    if (ctx->was_mpls) {
+        ctx_trigger_freeze(ctx);
+    }
+
     if (selection_method[0] == '\0') {
         xlate_default_select_group(ctx, group);
     } else if (!strcasecmp("hash", selection_method)) {
@@ -3802,7 +3830,7 @@  compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
 
     if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
         if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
-            ctx_trigger_freeze(ctx);
+            ctx->was_mpls = true;
         }
     } else if (n >= FLOW_MAX_MPLS_LABELS) {
         if (ctx->xin->packet != NULL) {
@@ -4478,6 +4506,90 @@  compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
 }
 
 static void
+recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
+{
+    /* Do not consider recirculating unless the packet was previously MPLS. */
+    if (!ctx->was_mpls) {
+        return;
+    }
+
+    /* Special case these actions, only recirculating if necessary.
+     * This avoids the overhead of recirculation in common use-cases.
+     */
+    switch (a->type) {
+
+    /* Output actions  do not require recirculation. */
+    case OFPACT_OUTPUT:
+    case OFPACT_ENQUEUE:
+    case OFPACT_OUTPUT_REG:
+    /* Set actions that don't touch L3+ fields do not require recirculation. */
+    case OFPACT_SET_VLAN_VID:
+    case OFPACT_SET_VLAN_PCP:
+    case OFPACT_SET_ETH_SRC:
+    case OFPACT_SET_ETH_DST:
+    case OFPACT_SET_TUNNEL:
+    case OFPACT_SET_QUEUE:
+    /* If actions of a group require recirculation that can be detected
+     * when translating them. */
+    case OFPACT_GROUP:
+        return;
+
+    /* Set field that don't touch L3+ fields don't require recirculation. */
+    case OFPACT_SET_FIELD:
+        if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) {
+            break;
+        }
+        return;
+
+    /* For simplicity, recirculate in all other cases. */
+    case OFPACT_CONTROLLER:
+    case OFPACT_BUNDLE:
+    case OFPACT_STRIP_VLAN:
+    case OFPACT_PUSH_VLAN:
+    case OFPACT_SET_IPV4_SRC:
+    case OFPACT_SET_IPV4_DST:
+    case OFPACT_SET_IP_DSCP:
+    case OFPACT_SET_IP_ECN:
+    case OFPACT_SET_IP_TTL:
+    case OFPACT_SET_L4_SRC_PORT:
+    case OFPACT_SET_L4_DST_PORT:
+    case OFPACT_REG_MOVE:
+    case OFPACT_STACK_PUSH:
+    case OFPACT_STACK_POP:
+    case OFPACT_DEC_TTL:
+    case OFPACT_SET_MPLS_LABEL:
+    case OFPACT_SET_MPLS_TC:
+    case OFPACT_SET_MPLS_TTL:
+    case OFPACT_DEC_MPLS_TTL:
+    case OFPACT_PUSH_MPLS:
+    case OFPACT_POP_MPLS:
+    case OFPACT_POP_QUEUE:
+    case OFPACT_FIN_TIMEOUT:
+    case OFPACT_RESUBMIT:
+    case OFPACT_LEARN:
+    case OFPACT_CONJUNCTION:
+    case OFPACT_MULTIPATH:
+    case OFPACT_NOTE:
+    case OFPACT_EXIT:
+    case OFPACT_SAMPLE:
+    case OFPACT_UNROLL_XLATE:
+    case OFPACT_CT:
+    case OFPACT_NAT:
+    case OFPACT_DEBUG_RECIRC:
+    case OFPACT_METER:
+    case OFPACT_CLEAR_ACTIONS:
+    case OFPACT_WRITE_ACTIONS:
+    case OFPACT_WRITE_METADATA:
+    case OFPACT_GOTO_TABLE:
+    default:
+        break;
+    }
+
+    /* Recirculate */
+    ctx_trigger_freeze(ctx);
+}
+
+static void
 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                  struct xlate_ctx *ctx)
 {
@@ -4500,6 +4612,8 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
         }
 
+        recirc_for_mpls(a, ctx);
+
         if (ctx->exit) {
             /* Check if need to store the remaining actions for later
              * execution. */
@@ -5151,6 +5265,7 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
         .pause = NULL,
 
+        .was_mpls = false,
         .conntracked = false,
 
         .ct_nat_action = NULL,
diff --git a/tests/mpls-xlate.at b/tests/mpls-xlate.at
index 7df52f513e7e..a9a3cf53bed3 100644
--- a/tests/mpls-xlate.at
+++ b/tests/mpls-xlate.at
@@ -2,18 +2,43 @@  AT_BANNER([mpls-xlate])
 
 AT_SETUP([MPLS xlate action])
 
-OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1])
+OVS_VSWITCHD_START(
+  [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \
+   add-port br0 p1 -- set Interface p1 type=patch \
+                                       options:peer=p2 ofport_request=2 -- \
+   add-br br1 -- \
+   set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+   set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+                  fail-mode=secure -- \
+   add-port br1 p2 -- set Interface p2 type=patch \
+                                       options:peer=p1])
 
 AT_CHECK([ovs-appctl dpif/show], [0], [dnl
 dummy@ovs-dummy: hit:0 missed:0
 	br0:
 		br0 65534/100: (dummy)
 		p0 1/1: (dummy)
+		p1 2/none: (patch: peer=p2)
+	br1:
+		br1 65534/101: (dummy)
+		p2 1/none: (patch: peer=p1)
 ])
 
 dnl Setup single MPLS tags.
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 group_id=1232,type=select,bucket=output:LOCAL])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 group_id=1233,type=all,bucket=output:LOCAL])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 group_id=1234,type=all,bucket=dec_ttl,output:LOCAL])
 AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 in_port=local,dl_type=0x0800,action=push_mpls:0x8847,set_field:10-\>mpls_label,output:1])
 AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=20,action=pop_mpls:0x0800,output:LOCAL])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=21,action=pop_mpls:0x0800,dec_ttl,output:LOCAL])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=22,action=pop_mpls:0x0800,group:1232])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=23,action=pop_mpls:0x0800,group:1233])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=24,action=pop_mpls:0x0800,group:1234])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 dl_type=0x8847,in_port=1,mpls_label=25,action=pop_mpls:0x0800,output:2])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 in_port=1,action=output:LOCAL])
+
+
 
 dnl Test MPLS push
 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=7777,dst=80)'], [0], [stdout])
@@ -21,17 +46,62 @@  AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: push_mpls(label=10,tc=0,ttl=64,bos=1,eth_type=0x8847),1
 ])
 
-dnl Test MPLS pop
+dnl Test MPLS pop then output (actions do not trigger reciculation)
 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=20,tc=0,ttl=64,bos=1)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: pop_mpls(eth_type=0x800),100
+])
+
+dnl Test MPLS pop, dec_ttl, output (actions trigger recirculation)
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=21,tc=0,ttl=64,bos=1)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x1)
 ])
 
 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(1),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: set(ipv4(ttl=63)),100
+])
+
+dnl Test MPLS pop then select group output (group type triggers recirculation)
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=22,tc=0,ttl=64,bos=1)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x2)
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: 100
 ])
 
+dnl Test MPLS pop then all group output (bucket actions do not trigger recirculation)
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=23,tc=0,ttl=64,bos=1)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: pop_mpls(eth_type=0x800),100
+])
+
+dnl Test MPLS pop then all group output (bucket actions trigger recirculation)
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=24,tc=0,ttl=64,bos=1)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x3)
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(3),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: set(ipv4(ttl=63)),100
+])
+
+dnl Test MPLS pop then all output to patch port
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=25,tc=0,ttl=64,bos=1)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x4)
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(4),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 101
+])
+
 dnl Setup multiple MPLS tags.
 AT_CHECK([ovs-ofctl del-flows br0])
 
@@ -52,10 +122,10 @@  AT_CHECK([tail -1 stdout], [0],
 dnl Double MPLS pop
 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=60,tc=0,ttl=64,bos=0,label=50,tc=0,ttl=64,bos=1)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
-  [Datapath actions: pop_mpls(eth_type=0x8847),pop_mpls(eth_type=0x800),recirc(0x2)
+  [Datapath actions: pop_mpls(eth_type=0x8847),pop_mpls(eth_type=0x800),recirc(0x5)
 ])
 
-AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(5),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
 AT_CHECK([tail -1 stdout], [0],
   [Datapath actions: set(ipv4(ttl=10)),100
 ])
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 18e5e22265a5..835028123e87 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -6415,8 +6415,7 @@  AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:
 sleep 1
 AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout_keep_actions], [0], [dnl
 recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,mpls_label=11,mpls_tc=3,mpls_ttl=64,mpls_bos=1, actions:push_mpls(label=11,tc=3,ttl=64,bos=0,eth_type=0x8847),2
-recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,mpls_bos=1, actions:pop_mpls(eth_type=0x800),recirc(0x1)
-recirc_id=0x1,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, actions:2
+recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,mpls_bos=1, actions:pop_mpls(eth_type=0x800),2
 ])
 OVS_VSWITCHD_STOP
 AT_CLEANUP