diff mbox

[ovs-dev,RFC,v2,02/13] Format NSH keys to readable strings

Message ID 1468344394-11176-1-git-send-email-johnson.li@intel.com
State Changes Requested
Headers show

Commit Message

Johnson.Li July 12, 2016, 5:26 p.m. UTC
Signed-off-by: Johnson Li <johnson.li@intel.com>

Comments

Simon Horman July 18, 2016, 6:15 a.m. UTC | #1
Hi Johnson,

On Wed, Jul 13, 2016 at 01:28:48AM +0800, Johnson Li wrote:
> Signed-off-by: Johnson Li <johnson.li@intel.com>

* Regarding the action set (which we discussed briefly off-list):

  I think that you need to update ofpacts_execute_action_set(), though
  possibly not in this patch, for push/pop_nsh to be usable in write
  actions. However, its not clear to me at which position of that function
  push/pop_nsh because as far as I know this has not been defined by
  OpenFlow.

* Regarding the implementation of push/pop_nsh in this patch:

  In general translation occurs in two phases in OvS.

  1. Composition: This generally involves updating fields of
     ctx->xin->flow to new desired values and ctx->wc to indicate
     which fields have been accessed so that masks for megaflows
     can be calculated correctly.

     For simple cases such as OFPACT_SET_IP_TTL this is open-coded
     in do_xlate_actions. For more complex cases helpers are provided,
     e.g.  OFPACT_PUSH_MPLS (though that is not very complex)

  2. Commit: Here differences between ctx->in->flow and ctx->base_flow,
     which are the same before translation starts, are compared. And any
     differences are resolved by writing actions to ctx->odp_actions.
     base_flow is then reset for cases where translation continues.

     This is performed by commit_odp_actions(), e.g. when called via
     xlate_commit_actions().

There are exceptions to the above and in some cases actions are written
directly to ctx->odp_actions, but I'm not sure that push/pop_nsh needs to
be such an exception.

> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 90edb56..1a63175 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -5072,8 +5072,58 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
>              ctx_trigger_freeze(ctx);
>              a = ofpact_next(a);
>              break;
> -        case OFPACT_PUSH_NSH:
> +        case OFPACT_PUSH_NSH: {
> +            struct ovs_action_push_nsh push;
> +            struct nsh_header *nsh = (struct nsh_header *)push.header;

I think OvS prefers, though admittedly does not strictly follow,
the reverse christmas-tree (longest to shortest line) ordering of
variables. So the above two lines should be inverted.

> +            //int md_len = 0;
> +            //bool crit_opt;

Please remove commented out code.

> +
> +            if (flow->nsh.md_type == NSH_MD_TYPE1) {
> +                struct nsh_md1_ctx *ctx = NULL;
> +
> +                nsh->base.flags = flow->nsh.flags;
> +                nsh->base.length = NSH_TYPE1_LEN >> 2;
> +                nsh->base.md_type = NSH_MD_TYPE1;
> +                nsh->base.next_proto = flow->nsh.next_proto;
> +                nsh->base.sfp = (flow->nsh.nsp >> 8) | (flow->nsh.nsi << 24);
> +
> +                ctx = (struct nsh_md1_ctx *)(nsh + 1);
> +                ctx->nshc1 = flow->nsh.nshc1;
> +                ctx->nshc2 = flow->nsh.nshc2;
> +                ctx->nshc3 = flow->nsh.nshc3;
> +                ctx->nshc4 = flow->nsh.nshc4;
> +
> +                push.len = NSH_TYPE1_LEN;
> +#if 0

Please remove #if 0. If the enclosed code is not as ready as the rest of
the patch please remove it too.

> +            } else if (flow->nsh.md_type == NSH_MD_TYPE2) {
> +                /* MD type 2 prototype with TUN_METADATA APIs. */
> +                struct nsh_md2_ctx *ctx = NULL;
> +
> +                nsh->base.md_type = NSH_MD_TYPE2;
> +                nsh->base.next_proto = flow->nsh.next_proto;
> +                nsh->base.sfp = (flow->nsh.nsp >> 8) | (flow->nsh.nsi << 24);
> +
> +                ctx = (struct nsh_md2_ctx *)(nsh + 1);
> +                md_len = tun_metadata_to_geneve_header(&flow->tunnel,
> +                                                       (struct geneve_opt *)ctx,
> +                                                       &crit_opt);

Perhaps it would be worth considering renaming this and other related
'geneve_' functions if they are being used beyond Geneve.

> +                nsh->base.length = (sizeof(struct nsh_header) + md_len) >> 2;
> +                push.len = md_len + sizeof(struct nsh_header);
> +#endif
> +            }

Are we sure no other value of flow->nsh.md_type can be present?

> +
> +            nl_msg_put_unspec(ctx->odp_actions, OVS_ACTION_ATTR_PUSH_NSH,
> +                              &push, sizeof push);
> +
> +            flow->dl_type = htons(ETH_TYPE_NSH);
> +            memset(&wc->masks.nsh.md_type, 0xff, sizeof wc->masks.nsh.md_type);
> +            break;
> +        }
> +
>          case OFPACT_POP_NSH:
> +            memset(&wc->masks.nsh.md_type, 0xff, sizeof wc->masks.nsh.md_type);
> +            memset(&flow->nsh, 0x0, sizeof flow->nsh);
> +            nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_NSH);
>              break;
>          }
Yang, Yi July 18, 2016, 10:37 a.m. UTC | #2
On Mon, Jul 18, 2016 at 03:15:13PM +0900, Simon Horman wrote:
> Hi Johnson,
> 
> On Wed, Jul 13, 2016 at 01:28:48AM +0800, Johnson Li wrote:
> > Signed-off-by: Johnson Li <johnson.li@intel.com>
> 
> * Regarding the action set (which we discussed briefly off-list):
> 
>   I think that you need to update ofpacts_execute_action_set(), though
>   possibly not in this patch, for push/pop_nsh to be usable in write
>   actions. However, its not clear to me at which position of that function
>   push/pop_nsh because as far as I know this has not been defined by
>   OpenFlow.
> 
> * Regarding the implementation of push/pop_nsh in this patch:
> 
>   In general translation occurs in two phases in OvS.
> 
>   1. Composition: This generally involves updating fields of
>      ctx->xin->flow to new desired values and ctx->wc to indicate
>      which fields have been accessed so that masks for megaflows
>      can be calculated correctly.
> 
>      For simple cases such as OFPACT_SET_IP_TTL this is open-coded
>      in do_xlate_actions. For more complex cases helpers are provided,
>      e.g.  OFPACT_PUSH_MPLS (though that is not very complex)
> 
>   2. Commit: Here differences between ctx->in->flow and ctx->base_flow,
>      which are the same before translation starts, are compared. And any
>      differences are resolved by writing actions to ctx->odp_actions.
>      base_flow is then reset for cases where translation continues.
> 
>      This is performed by commit_odp_actions(), e.g. when called via
>      xlate_commit_actions().
> 
> There are exceptions to the above and in some cases actions are written
> directly to ctx->odp_actions, but I'm not sure that push/pop_nsh needs to
> be such an exception.

Simon, very good guide, do push_eth and pop_eth also need to follow
this?

> 
> > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> > index 90edb56..1a63175 100644
> > --- a/ofproto/ofproto-dpif-xlate.c
> > +++ b/ofproto/ofproto-dpif-xlate.c
> > @@ -5072,8 +5072,58 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
> >              ctx_trigger_freeze(ctx);
> >              a = ofpact_next(a);
> >              break;
> > -        case OFPACT_PUSH_NSH:
> > +        case OFPACT_PUSH_NSH: {
> > +            struct ovs_action_push_nsh push;
> > +            struct nsh_header *nsh = (struct nsh_header *)push.header;
> 
> I think OvS prefers, though admittedly does not strictly follow,
> the reverse christmas-tree (longest to shortest line) ordering of
> variables. So the above two lines should be inverted.
> 
> > +            //int md_len = 0;
> > +            //bool crit_opt;
> 
> Please remove commented out code.
> 
> > +
> > +            if (flow->nsh.md_type == NSH_MD_TYPE1) {
> > +                struct nsh_md1_ctx *ctx = NULL;
> > +
> > +                nsh->base.flags = flow->nsh.flags;
> > +                nsh->base.length = NSH_TYPE1_LEN >> 2;
> > +                nsh->base.md_type = NSH_MD_TYPE1;
> > +                nsh->base.next_proto = flow->nsh.next_proto;
> > +                nsh->base.sfp = (flow->nsh.nsp >> 8) | (flow->nsh.nsi << 24);
> > +
> > +                ctx = (struct nsh_md1_ctx *)(nsh + 1);
> > +                ctx->nshc1 = flow->nsh.nshc1;
> > +                ctx->nshc2 = flow->nsh.nshc2;
> > +                ctx->nshc3 = flow->nsh.nshc3;
> > +                ctx->nshc4 = flow->nsh.nshc4;
> > +
> > +                push.len = NSH_TYPE1_LEN;
> > +#if 0
> 
> Please remove #if 0. If the enclosed code is not as ready as the rest of
> the patch please remove it too.
> 
> > +            } else if (flow->nsh.md_type == NSH_MD_TYPE2) {
> > +                /* MD type 2 prototype with TUN_METADATA APIs. */
> > +                struct nsh_md2_ctx *ctx = NULL;
> > +
> > +                nsh->base.md_type = NSH_MD_TYPE2;
> > +                nsh->base.next_proto = flow->nsh.next_proto;
> > +                nsh->base.sfp = (flow->nsh.nsp >> 8) | (flow->nsh.nsi << 24);
> > +
> > +                ctx = (struct nsh_md2_ctx *)(nsh + 1);
> > +                md_len = tun_metadata_to_geneve_header(&flow->tunnel,
> > +                                                       (struct geneve_opt *)ctx,
> > +                                                       &crit_opt);
> 
> Perhaps it would be worth considering renaming this and other related
> 'geneve_' functions if they are being used beyond Geneve.
> 
> > +                nsh->base.length = (sizeof(struct nsh_header) + md_len) >> 2;
> > +                push.len = md_len + sizeof(struct nsh_header);
> > +#endif
> > +            }
> 
> Are we sure no other value of flow->nsh.md_type can be present?
> 
> > +
> > +            nl_msg_put_unspec(ctx->odp_actions, OVS_ACTION_ATTR_PUSH_NSH,
> > +                              &push, sizeof push);
> > +
> > +            flow->dl_type = htons(ETH_TYPE_NSH);
> > +            memset(&wc->masks.nsh.md_type, 0xff, sizeof wc->masks.nsh.md_type);
> > +            break;
> > +        }
> > +
> >          case OFPACT_POP_NSH:
> > +            memset(&wc->masks.nsh.md_type, 0xff, sizeof wc->masks.nsh.md_type);
> > +            memset(&flow->nsh, 0x0, sizeof flow->nsh);
> > +            nl_msg_put_flag(ctx->odp_actions, OVS_ACTION_ATTR_POP_NSH);
> >              break;
> >          }
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
Simon Horman July 18, 2016, 12:13 p.m. UTC | #3
On Mon, Jul 18, 2016 at 06:37:04PM +0800, Yang, Yi wrote:
> On Mon, Jul 18, 2016 at 03:15:13PM +0900, Simon Horman wrote:
> > Hi Johnson,
> > 
> > On Wed, Jul 13, 2016 at 01:28:48AM +0800, Johnson Li wrote:
> > > Signed-off-by: Johnson Li <johnson.li@intel.com>
> > 
> > * Regarding the action set (which we discussed briefly off-list):
> > 
> >   I think that you need to update ofpacts_execute_action_set(), though
> >   possibly not in this patch, for push/pop_nsh to be usable in write
> >   actions. However, its not clear to me at which position of that function
> >   push/pop_nsh because as far as I know this has not been defined by
> >   OpenFlow.
> > 
> > * Regarding the implementation of push/pop_nsh in this patch:
> > 
> >   In general translation occurs in two phases in OvS.
> > 
> >   1. Composition: This generally involves updating fields of
> >      ctx->xin->flow to new desired values and ctx->wc to indicate
> >      which fields have been accessed so that masks for megaflows
> >      can be calculated correctly.
> > 
> >      For simple cases such as OFPACT_SET_IP_TTL this is open-coded
> >      in do_xlate_actions. For more complex cases helpers are provided,
> >      e.g.  OFPACT_PUSH_MPLS (though that is not very complex)
> > 
> >   2. Commit: Here differences between ctx->in->flow and ctx->base_flow,
> >      which are the same before translation starts, are compared. And any
> >      differences are resolved by writing actions to ctx->odp_actions.
> >      base_flow is then reset for cases where translation continues.
> > 
> >      This is performed by commit_odp_actions(), e.g. when called via
> >      xlate_commit_actions().
> > 
> > There are exceptions to the above and in some cases actions are written
> > directly to ctx->odp_actions, but I'm not sure that push/pop_nsh needs to
> > be such an exception.
> 
> Simon, very good guide, do push_eth and pop_eth also need to follow
> this?

Not exactly.

As of v12 of the patch-set there are two phases, compose and commit.
This was not the case in previous versions and was the source of some bugs.

The composition phase is not done directly in do_xlate_actions(). Rather it
is done as required in compose_output_action__() and
execute_controller_action() by setting flow->base_layer. This implements
automatic placement of push/pop_eth actions as they are required for
output.

This can be seen in "[PATCH v12 2/3] userspace: add layer 3 flow and
switching support"

The commit phase is provided by commit_ether_action() which also handles
modification of ethernet addresses other than by push/pop_eth.

This can be seen in "PATCH v12 1/3] userspace: add support for pop_eth and
push_eth action"

[snip]
Jan Scheurich July 18, 2016, 8:20 p.m. UTC | #4
> From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Simon Horman

> Sent: Monday, 18 July, 2016 14:14

> >

> > Simon, very good guide, do push_eth and pop_eth also need to follow

> > this?

> 

> Not exactly.

> 

> As of v12 of the patch-set there are two phases, compose and commit.

> This was not the case in previous versions and was the source of some bugs.

> 

> The composition phase is not done directly in do_xlate_actions(). Rather it is

> done as required in compose_output_action__() and

> execute_controller_action() by setting flow->base_layer. This implements

> automatic placement of push/pop_eth actions as they are required for

> output.


Simon, the difference to your patch set is that here the controller sends an explicit push_eth action in order to push an Ethernet header and then set the MAC addresses before transmission to a L2 port.

As far as I can judge, this requires triggering a commit in the push_eth composition phase, so that subsequent set_field(dl_src/dst) actions set the new outer ethernet header rather than the inner header. (Compare this to  compose_mpls_push_action()).

> 

> This can be seen in "[PATCH v12 2/3] userspace: add layer 3 flow and

> switching support"

> 

> The commit phase is provided by commit_ether_action() which also handles

> modification of ethernet addresses other than by push/pop_eth.

> 

> This can be seen in "PATCH v12 1/3] userspace: add support for pop_eth and

> push_eth action"
Jan Scheurich July 18, 2016, 11:21 p.m. UTC | #5
Hi Johnson,

> From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Yang, Yi

> Sent: Monday, 18 July, 2016 12:37

> 

> On Mon, Jul 18, 2016 at 03:15:13PM +0900, Simon Horman wrote:

> > Hi Johnson,

> >

> > On Wed, Jul 13, 2016 at 01:28:48AM +0800, Johnson Li wrote:

> > > Signed-off-by: Johnson Li <johnson.li@intel.com>

> >

> > * Regarding the action set (which we discussed briefly off-list):

> >

> >   I think that you need to update ofpacts_execute_action_set(), though

> >   possibly not in this patch, for push/pop_nsh to be usable in write

> >   actions. However, its not clear to me at which position of that function

> >   push/pop_nsh because as far as I know this has not been defined by

> >   OpenFlow.


The OF standard specifies the following order for action set execution:
1. copy TTL inwards: apply copy TTL inward actions to the packet
2. pop: apply all tag pop actions to the packet
3. push-MPLS: apply MPLS tag push action to the packet
4. push-PBB: apply PBB tag push action to the packet
5. push-VLAN: apply VLAN tag push action to the packet
6. copy TTL outwards: apply copy TTL outwards action to the packet
7. decrement TTL: apply decrement TTL action to the packet
8. set: apply all set-field actions to the packet
9. qos: apply all QoS actions, such as set queue to the packet
10. group: if a group action is specified, apply the actions of the relevant group bucket(s) in the order specified by this list
11. output: if no group action is specified, forward the packet on the port specified by the output action

If both push_nsh and push_eth are in the action set, push_nsh should be executed before push_eth. Furthermore, if push_mpls and push_eth are present in the action set, push_eth should be executed before push_mpls (push_mpls pushes the MPLS shim header between MAC header and L2 payload, so the resulting encap would be Outer Ethernet, MPLS, Inner Ethernet, which may be useful for Ethernet over MPLS).

My suggestion for the action set execution order would therefore be:
3.a	push_nsh
3.b 	push_eth
3.c	push_mpls
4	push_pbb
5	push_vlan

I agree that this is somewhat arbitrary and can be debated, but one can achieve any wanted order using the Apply Actions instruction.

> >

> > * Regarding the implementation of push/pop_nsh in this patch:

> >

> >   In general translation occurs in two phases in OvS.

> >

> >   1. Composition: This generally involves updating fields of

> >      ctx->xin->flow to new desired values and ctx->wc to indicate

> >      which fields have been accessed so that masks for megaflows

> >      can be calculated correctly.

> >

> >      For simple cases such as OFPACT_SET_IP_TTL this is open-coded

> >      in do_xlate_actions. For more complex cases helpers are provided,

> >      e.g.  OFPACT_PUSH_MPLS (though that is not very complex)

> >

> >   2. Commit: Here differences between ctx->in->flow and ctx->base_flow,

> >      which are the same before translation starts, are compared. And any

> >      differences are resolved by writing actions to ctx->odp_actions.

> >      base_flow is then reset for cases where translation continues.

> >

> >      This is performed by commit_odp_actions(), e.g. when called via

> >      xlate_commit_actions().

> >

> > There are exceptions to the above and in some cases actions are

> > written directly to ctx->odp_actions, but I'm not sure that

> > push/pop_nsh needs to be such an exception.


In addition to splitting up the push_nsh handling into compose and commit phase you need to consider the following:

As push_nsh pushes an encapsulation that changes the packet's ethertype and hides the inner packet, you need to call xlate_commit_actions(ctx) to really carry out all not yet committed actions on the inner packet before you start composing the push_nsh action.

Conversely, when you compose the pop_nsh action you need to trigger a recirculation of the packet after pop_nsh to have the datapath reparse the inner packet and subject it to another megaflow lookup. You should do that by calling  ctx_trigger_freeze(ctx) after having composed the pop_mpls action.

The same goes for the actions push_eth and pop_eth.

BR, Jan
Simon Horman Aug. 10, 2016, 10:08 a.m. UTC | #6
On Mon, Jul 18, 2016 at 08:20:42PM +0000, Jan Scheurich wrote:
> > From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Simon Horman
> > Sent: Monday, 18 July, 2016 14:14
> > >
> > > Simon, very good guide, do push_eth and pop_eth also need to follow
> > > this?
> > 
> > Not exactly.
> > 
> > As of v12 of the patch-set there are two phases, compose and commit.
> > This was not the case in previous versions and was the source of some bugs.
> > 
> > The composition phase is not done directly in do_xlate_actions(). Rather it is
> > done as required in compose_output_action__() and
> > execute_controller_action() by setting flow->base_layer. This implements
> > automatic placement of push/pop_eth actions as they are required for
> > output.
> 
> Simon, the difference to your patch set is that here the controller sends an explicit push_eth action in order to push an Ethernet header and then set the MAC addresses before transmission to a L2 port.
> 
> As far as I can judge, this requires triggering a commit in the push_eth composition phase, so that subsequent set_field(dl_src/dst) actions set the new outer ethernet header rather than the inner header. (Compare this to  compose_mpls_push_action()).

Yes, that is entirely correct.

> 
> > 
> > This can be seen in "[PATCH v12 2/3] userspace: add layer 3 flow and
> > switching support"
> > 
> > The commit phase is provided by commit_ether_action() which also handles
> > modification of ethernet addresses other than by push/pop_eth.
> > 
> > This can be seen in "PATCH v12 1/3] userspace: add support for pop_eth and
> > push_eth action"
> 
>
Simon Horman Aug. 10, 2016, 10:14 a.m. UTC | #7
On Mon, Jul 18, 2016 at 11:21:11PM +0000, Jan Scheurich wrote:
> Hi Johnson,
> 
> > From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Yang, Yi
> > Sent: Monday, 18 July, 2016 12:37
> > 
> > On Mon, Jul 18, 2016 at 03:15:13PM +0900, Simon Horman wrote:
> > > Hi Johnson,
> > >
> > > On Wed, Jul 13, 2016 at 01:28:48AM +0800, Johnson Li wrote:
> > > > Signed-off-by: Johnson Li <johnson.li@intel.com>
> > >
> > > * Regarding the action set (which we discussed briefly off-list):
> > >
> > >   I think that you need to update ofpacts_execute_action_set(), though
> > >   possibly not in this patch, for push/pop_nsh to be usable in write
> > >   actions. However, its not clear to me at which position of that function
> > >   push/pop_nsh because as far as I know this has not been defined by
> > >   OpenFlow.
> 
> The OF standard specifies the following order for action set execution:
> 1. copy TTL inwards: apply copy TTL inward actions to the packet
> 2. pop: apply all tag pop actions to the packet
> 3. push-MPLS: apply MPLS tag push action to the packet
> 4. push-PBB: apply PBB tag push action to the packet
> 5. push-VLAN: apply VLAN tag push action to the packet
> 6. copy TTL outwards: apply copy TTL outwards action to the packet
> 7. decrement TTL: apply decrement TTL action to the packet
> 8. set: apply all set-field actions to the packet
> 9. qos: apply all QoS actions, such as set queue to the packet
> 10. group: if a group action is specified, apply the actions of the relevant group bucket(s) in the order specified by this list
> 11. output: if no group action is specified, forward the packet on the port specified by the output action
> 
> If both push_nsh and push_eth are in the action set, push_nsh should be executed before push_eth. Furthermore, if push_mpls and push_eth are present in the action set, push_eth should be executed before push_mpls (push_mpls pushes the MPLS shim header between MAC header and L2 payload, so the resulting encap would be Outer Ethernet, MPLS, Inner Ethernet, which may be useful for Ethernet over MPLS).
> 
> My suggestion for the action set execution order would therefore be:
> 3.a	push_nsh
> 3.b 	push_eth
> 3.c	push_mpls
> 4	push_pbb
> 5	push_vlan
> 
> I agree that this is somewhat arbitrary and can be debated, but one can achieve any wanted order using the Apply Actions instruction.

I think that sounds reasonable.

Actually, I think the important thing is to define the ordering clearly.
I wonder if taking it to the ONF earlier than later would be wise. It will
be painful if at some point they chose a different ordering to what is
present in the wild.

> > > * Regarding the implementation of push/pop_nsh in this patch:
> > >
> > >   In general translation occurs in two phases in OvS.
> > >
> > >   1. Composition: This generally involves updating fields of
> > >      ctx->xin->flow to new desired values and ctx->wc to indicate
> > >      which fields have been accessed so that masks for megaflows
> > >      can be calculated correctly.
> > >
> > >      For simple cases such as OFPACT_SET_IP_TTL this is open-coded
> > >      in do_xlate_actions. For more complex cases helpers are provided,
> > >      e.g.  OFPACT_PUSH_MPLS (though that is not very complex)
> > >
> > >   2. Commit: Here differences between ctx->in->flow and ctx->base_flow,
> > >      which are the same before translation starts, are compared. And any
> > >      differences are resolved by writing actions to ctx->odp_actions.
> > >      base_flow is then reset for cases where translation continues.
> > >
> > >      This is performed by commit_odp_actions(), e.g. when called via
> > >      xlate_commit_actions().
> > >
> > > There are exceptions to the above and in some cases actions are
> > > written directly to ctx->odp_actions, but I'm not sure that
> > > push/pop_nsh needs to be such an exception.
> 
> In addition to splitting up the push_nsh handling into compose and commit phase you need to consider the following:
> 
> As push_nsh pushes an encapsulation that changes the packet's ethertype and hides the inner packet, you need to call xlate_commit_actions(ctx) to really carry out all not yet committed actions on the inner packet before you start composing the push_nsh action.
> 
> Conversely, when you compose the pop_nsh action you need to trigger a recirculation of the packet after pop_nsh to have the datapath reparse the inner packet and subject it to another megaflow lookup. You should do that by calling  ctx_trigger_freeze(ctx) after having composed the pop_mpls action.
> 
> The same goes for the actions push_eth and pop_eth.

Yes, I believe that is true except for the last statement.

I think that push/pop_nsh will likely follow a similar pattern to
push/pop_mpls as both sets of actions may change the ethernet type of a
packet.  But I think the case of push/pop_eth is a little different. The
way this is modelled is that those actions affect the presence of an
ethernet header (which is in itself a bit special) but not the type of the
packet.
Jan Scheurich Aug. 10, 2016, 12:21 p.m. UTC | #8
> -----Original Message-----
> From: Simon Horman [mailto:simon.horman@netronome.com]
> Sent: Wednesday, 10 August, 2016 12:14
> >
> > My suggestion for the action set execution order would therefore be:
> > 3.a	push_nsh
> > 3.b 	push_eth
> > 3.c	push_mpls
> > 4	push_pbb
> > 5	push_vlan
> >
> > I agree that this is somewhat arbitrary and can be debated, but one can
> achieve any wanted order using the Apply Actions instruction.
> 
> I think that sounds reasonable.
> 
> Actually, I think the important thing is to define the ordering clearly.
> I wonder if taking it to the ONF earlier than later would be wise. It will be
> painful if at some point they chose a different ordering to what is present in the
> wild.

That makes sense.
Does anyone have a reach into the ONF to propose this as OF extensions?

> >
> > Conversely, when you compose the pop_nsh action you need to trigger a
> recirculation of the packet after pop_nsh to have the datapath reparse the
> inner packet and subject it to another megaflow lookup. You should do that by
> calling  ctx_trigger_freeze(ctx) after having composed the pop_mpls action.
> >
> > The same goes for the actions push_eth and pop_eth.
> 
> Yes, I believe that is true except for the last statement.
> 
> I think that push/pop_nsh will likely follow a similar pattern to push/pop_mpls
> as both sets of actions may change the ethernet type of a packet.  But I think
> the case of push/pop_eth is a little different. The way this is modelled is that
> those actions affect the presence of an ethernet header (which is in itself a bit
> special) but not the type of the packet.

I agree with you if the push_eth action is executed on a non-ethernet packet. 

But we also should consider the case that push_eth is executed on an ethernet frame, effectively creating a raw MAC in MAC encapsulation. I suggested to use the ethertype 0x6658 (used e.g. as protocol type for Transparent Ethernet Bridging (Ethernet over GRE)) for this.

See also http://openvswitch.org/pipermail/dev/2016-July/075632.html

Similarly for pop_eth.
Simon Horman Aug. 10, 2016, 12:44 p.m. UTC | #9
Hi Jan,

On Wed, Aug 10, 2016 at 12:21:08PM +0000, Jan Scheurich wrote:
> > -----Original Message-----
> > From: Simon Horman [mailto:simon.horman@netronome.com]
> > Sent: Wednesday, 10 August, 2016 12:14
> > >
> > > My suggestion for the action set execution order would therefore be:
> > > 3.a	push_nsh
> > > 3.b 	push_eth
> > > 3.c	push_mpls
> > > 4	push_pbb
> > > 5	push_vlan
> > >
> > > I agree that this is somewhat arbitrary and can be debated, but one can
> > achieve any wanted order using the Apply Actions instruction.
> > 
> > I think that sounds reasonable.
> > 
> > Actually, I think the important thing is to define the ordering clearly.
> > I wonder if taking it to the ONF earlier than later would be wise. It will be
> > painful if at some point they chose a different ordering to what is present in the
> > wild.
> 
> That makes sense.
> Does anyone have a reach into the ONF to propose this as OF extensions?

I investigate this if none else is so inclined.

> > > Conversely, when you compose the pop_nsh action you need to trigger a
> > recirculation of the packet after pop_nsh to have the datapath reparse the
> > inner packet and subject it to another megaflow lookup. You should do that by
> > calling  ctx_trigger_freeze(ctx) after having composed the pop_mpls action.
> > >
> > > The same goes for the actions push_eth and pop_eth.
> > 
> > Yes, I believe that is true except for the last statement.
> > 
> > I think that push/pop_nsh will likely follow a similar pattern to push/pop_mpls
> > as both sets of actions may change the ethernet type of a packet.  But I think
> > the case of push/pop_eth is a little different. The way this is modelled is that
> > those actions affect the presence of an ethernet header (which is in itself a bit
> > special) but not the type of the packet.
> 
> I agree with you if the push_eth action is executed on a non-ethernet packet. 
> 
> But we also should consider the case that push_eth is executed on an ethernet frame, effectively creating a raw MAC in MAC encapsulation. I suggested to use the ethertype 0x6658 (used e.g. as protocol type for Transparent Ethernet Bridging (Ethernet over GRE)) for this.
> 
> See also http://openvswitch.org/pipermail/dev/2016-July/075632.html
> 
> Similarly for pop_eth.

I think that makes sense but we also need to keep track of the type of the
Ethernet headerless packet so we know how to intemperate its headers.

The scheme that has evolved (and still evolved) during review of the
kernel datapath push/pop_eth changes is basically for skb->protocol = 0x6658
to be used when OvS sends and receives a packet but that internally
OvS uses the type that the packet would have been if an ethernet header was
present. And that type is stored in skb->protocol.

This seems to work for the current l3-vpn (e.g. MPLS over GRE) work.
It may need to be updated along the lines you suggest for NSH.
diff mbox

Patch

diff --git a/lib/match.c b/lib/match.c
index db78831..4eab67f 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -1031,6 +1031,52 @@  format_flow_tunnel(struct ds *s, const struct match *match)
 }
 
 static void
+format_flow_nsh(struct ds *s, const struct match *match)
+{
+    const struct flow_wildcards *wc = &match->wc;
+    const struct flow_nsh *nsh = &match->flow.nsh;
+
+    if (nsh->nsp) {
+        format_be32_masked(s, "nsp", nsh->nsp, wc->masks.nsh.nsp);
+        ds_put_format(s, "nsi=%"PRIu8",", nsh->nsi);
+
+        if (wc->masks.nsh.flags) {
+            format_be32_masked(s, "flags", nsh->flags, wc->masks.nsh.flags);
+        }
+        if (wc->masks.nsh.md_type) {
+            ds_put_format(s, "md_type=%"PRIu8",", nsh->md_type);
+        }
+        if (wc->masks.nsh.next_proto) {
+            ds_put_format(s, "next_proto=%"PRIu8",", nsh->next_proto);
+        }
+
+        if(nsh->md_type == 0x1) { /* NSH_MD_TYPE1 */
+            if (wc->masks.nsh.nshc1) {
+                format_be32_masked(s, "nshc1", nsh->nshc1,
+                                   wc->masks.nsh.nshc1);
+            }
+            if (wc->masks.nsh.nshc2) {
+                format_be32_masked(s, "nshc2", nsh->nshc2,
+                                   wc->masks.nsh.nshc2);
+            }
+            if (wc->masks.nsh.nshc3) {
+                format_be32_masked(s, "nshc3", nsh->nshc3,
+                                   wc->masks.nsh.nshc3);
+            }
+            if (wc->masks.nsh.nshc4) {
+                format_be32_masked(s, "nshc4", nsh->nshc4,
+                                   wc->masks.nsh.nshc4);
+            }
+#if 0
+        } else if (nsh->md_type == 0x2) { /* NSH_MD_TYPE2 */
+            /* Prototype of MD type 2 with TUN_METADATA APIs. */
+            tun_metadata_match_format(s, match);
+#endif
+        }
+    }
+}
+
+static void
 format_ct_label_masked(struct ds *s, const ovs_u128 *key, const ovs_u128 *mask)
 {
     if (!ovs_u128_is_zero(*mask)) {
@@ -1187,6 +1233,8 @@  match_format(const struct match *match, struct ds *s, int priority)
 
     format_be64_masked(s, "metadata", f->metadata, wc->masks.metadata);
 
+    format_flow_nsh(s, match);
+
     if (wc->masks.in_port.ofp_port) {
         ds_put_format(s, "%sin_port=%s", colors.param, colors.end);
         ofputil_format_port(f->in_port.ofp_port, s);