diff mbox

[ovs-dev,v4,2/3] nsh: add new flow key 'ttl'

Message ID 1503633771-112384-3-git-send-email-yi.y.yang@intel.com
State Changes Requested
Headers show

Commit Message

Yang, Yi Aug. 25, 2017, 4:02 a.m. UTC
IETF NSH draft will be approved by end of August, NSH header
format has been finalized and won't be change anymore, so we
need to follow this final spec to implement nsh.

kernel data path also needs finalized uAPIs, they can't be
changed once they are merged.

This patch adds new nsh key 'ttl', bits of flags and mdtype
fields are also changed to follow the final spec.

A new action dec_nsh_ttl will be added in the following patch.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
---
 datapath/linux/compat/include/linux/openvswitch.h |  17 +-
 include/openvswitch/flow.h                        |   6 +-
 include/openvswitch/meta-flow.h                   |  31 +-
 include/openvswitch/nsh.h                         | 326 ++++++++++++++++++++--
 include/openvswitch/packets.h                     |  18 +-
 lib/flow.c                                        |  64 ++---
 lib/flow.h                                        |   2 +-
 lib/match.c                                       |  12 +-
 lib/meta-flow.c                                   |  56 +++-
 lib/meta-flow.xml                                 |   6 +-
 lib/nx-match.c                                    |  16 +-
 lib/odp-execute.c                                 |  29 +-
 lib/odp-util.c                                    | 152 +++++-----
 lib/packets.c                                     |   5 +-
 ofproto/ofproto-dpif-xlate.c                      |  11 +-
 tests/nsh.at                                      |  46 +--
 16 files changed, 555 insertions(+), 242 deletions(-)

Comments

Jan Scheurich Aug. 28, 2017, 11:31 p.m. UTC | #1
Hi Yi,

Thanks for the fixes. Please find my individual comments inline.

BR, Jan

> -----Original Message-----
> -#define OVS_ENCAP_NSH_MAX_MD_LEN 16
>  /*
>   * struct ovs_action_encap_nsh - %OVS_ACTION_ATTR_ENCAP_NSH
>   * @flags: NSH header flags.
> + * @ttl: NSH header TTL.
>   * @mdtype: NSH metadata type.
>   * @mdlen: Length of NSH metadata in bytes.
>   * @np: NSH next_protocol: Inner packet type.
> @@ -805,11 +796,13 @@ struct ovs_action_push_eth {
>   */
>  struct ovs_action_encap_nsh {
>      uint8_t flags;
> +    uint8_t ttl;
>      uint8_t mdtype;
> -    uint8_t mdlen;
>      uint8_t np;
>      __be32 path_hdr;
> -    uint8_t metadata[OVS_ENCAP_NSH_MAX_MD_LEN];
> +    uint8_t mdlen;
> +    uint8_t pad[7]; /* Aligned to 64 bit boundary for metadata */
> +    uint8_t metadata[];
>  };

Alignment of metadata to 32 bits should suffice here. Netlink attrs are only 4-byte aligned anyhow. The same goes for the MD1 context headers inside metadata.

> diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
> index a658a58..cd61fff 100644
> --- a/include/openvswitch/flow.h
> +++ b/include/openvswitch/flow.h
> @@ -146,7 +146,7 @@ struct flow {
>      struct eth_addr arp_tha;    /* ARP/ND target hardware address. */
>      ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching L4. */
>      ovs_be16 pad2;              /* Pad to 64 bits. */
> -    struct flow_nsh nsh;        /* Network Service Header keys */
> +    struct ovs_key_nsh nsh;     /* Network Service Header keys */

Why rename this type to struct ovs_key_nsh?

> diff --git a/include/openvswitch/nsh.h b/include/openvswitch/nsh.h
[...]
> @@ -62,7 +210,7 @@ struct nsh_md2_tlv {
>  };
> 
>  struct nsh_hdr {
> -    ovs_be16 ver_flags_len;
> +    ovs_be16 ver_flags_ttl_len;
>      uint8_t md_type;
>      uint8_t next_proto;
>      ovs_16aligned_be32 path_hdr;
> @@ -75,11 +223,16 @@ struct nsh_hdr {
>  /* Masking NSH header fields. */
>  #define NSH_VER_MASK       0xc000
>  #define NSH_VER_SHIFT      14
> -#define NSH_FLAGS_MASK     0x3fc0
> -#define NSH_FLAGS_SHIFT    6
> +#define NSH_FLAGS_MASK     0x3000
> +#define NSH_FLAGS_SHIFT    12

With this definition of NSH FLAGS the match field NSH_FLAGS covers
only the two bits between Version and TTL field. Bit 1 is the OaM flag, 
bit 0 is currently unused. The remaining 4 unused bits between Length
and MD Type fields are excluded. That is perhaps OK as we cannot be
sure the these 4 bits will be used as "flags" in the future. But it needs
to be documented clearly that NSH_FLAGS has only two bits and which
bits in the NSH header it covers.

> +static inline void
> +__nsh_set_xflag(struct nsh_hdr *nsh, uint16_t xflag, uint16_t xmask)
> +{
> +    nsh->ver_flags_ttl_len
> +        = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
> +}

Avoid __* prefix. Better call it nsh_set_base_hdr_masked(). 

> +
> +static inline void nsh_set_flags_and_ttl(struct nsh_hdr *nsh, uint8_t flags,
> +                                         uint8_t ttl)
> +{
> +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> NSH_FLAGS_MASK) |
> +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
> +                    NSH_FLAGS_MASK | NSH_TTL_MASK);
> +}
> +
> +static inline void nsh_set_flags_ttl_len(struct nsh_hdr *nsh, uint8_t flags,
> +                                         uint8_t ttl, uint8_t len)
> +{
> +    len = len >> 2;
> +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> NSH_FLAGS_MASK) |
> +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
> +                         ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
> +                    NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
> +}

Do you really need to these hybrid functions? Why not use modular functions nsh_set_flags(), nsh_set_ttl() and nsh_set_len()? I believe the run-time overhead is negligible with an optimizing compiler.

For symmetry you should also add a function setter function for path_hdr:

static inline void
nsh_set_path_hdr(struct nsh_hdr *nsh, ovs_be32 path_hdr)
{
    put_16aligned_be32(&nsh->path_hdr, path_hdr);
}

> diff --git a/include/openvswitch/packets.h b/include/openvswitch/packets.h
> index be91e02..603ec44 100644
> --- a/include/openvswitch/packets.h
> +++ b/include/openvswitch/packets.h
>
>  /* Network Service Header keys */
> -struct flow_nsh {
> +struct ovs_key_nsh {
>      uint8_t flags;
> +    uint8_t ttl;
>      uint8_t mdtype;
>      uint8_t np;
> -    uint8_t si;
> -    ovs_be32 spi;
> +    ovs_be32 path_hdr;
>      ovs_be32 c[4];
>  };

Why did you rename this struct? As part of the struct flow, it was
more appropriately called struct flow_nsh.

> diff --git a/lib/flow.c b/lib/flow.c
> index b2b10aa..547ff70 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
>  bool
> -parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key)
> +parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key)
>  {
[...]
> -    /* Check if it is long enough for NSH header, doesn't support
> -     * MD type 2 yet
> -     */
>      if (OVS_UNLIKELY(*sizep < NSH_M_TYPE1_LEN)) {
>          return false;
>      }

This should compare against the minimum fixed size (NSH_BASE_HDR_LEN)
of an NSH header, not NSH_M_TYPE1_LEN as we want to successfully parse 
any MD type header.

>      switch (key->mdtype) {
>          case NSH_M_TYPE1:
> +            if (length != NSH_M_TYPE1_LEN) {
> +                return false;
> +            }
>              for (size_t i = 0; i < 4; i++) {
>                  key->c[i] = get_16aligned_be32(&nsh->md1.c[i]);
>              }
>              break;
>          case NSH_M_TYPE2:
> -            /* Don't support MD type 2 yet, so return false */
> +            /* Don't support MD type 2 metedata parsing yet */
> +            if (length < NSH_BASE_HDR_LEN) {
> +                return false;
> +            }

This check is superfluous as it has been done before.

> diff --git a/lib/meta-flow.c b/lib/meta-flow.c
> index 64a8cf1..b50679c 100644
> --- a/lib/meta-flow.c
> +++ b/lib/meta-flow.c
> @@ -906,10 +914,13 @@ mf_get_value(const struct mf_field *mf, const
> struct flow *flow,
>          value->u8 = flow->nsh.np;
>          break;
>      case MFF_NSH_SPI:
> -        value->be32 = flow->nsh.spi;
> +        value->be32 = nsh_path_hdr_to_spi(flow->nsh.path_hdr);
> +        if (value->be32 == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> +            value->be32 = OVS_BE32_MAX;
> +        }

This looks strange. The SPI is a 24 bit field. Why would you expand it to OVS_BE32_MAX?

> @@ -1221,10 +1235,12 @@ mf_set_value(const struct mf_field *mf,
>          MATCH_SET_FIELD_UINT8(match, nsh.np, value->u8);
>          break;
>      case MFF_NSH_SPI:
> -        MATCH_SET_FIELD_BE32(match, nsh.spi, value->be32);
> +        match->wc.masks.nsh.path_hdr |= htonl(NSH_SPI_MASK);
> +        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr, value->be32);

Introduce a wrapper function match_set_nsh_spi() where you rely on the setter functions in nsh.h to manipulate the path_hdr.

>          break;
>      case MFF_NSH_SI:
> -        MATCH_SET_FIELD_UINT8(match, nsh.si, value->u8);
> +        match->wc.masks.nsh.path_hdr |= htonl(NSH_SI_MASK);
> +        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr, value->u8);
>          break;

Dito.

> @@ -2103,10 +2126,12 @@ mf_set_wild(const struct mf_field *mf, struct
> match *match, char **err_str)
>          MATCH_SET_FIELD_MASKED(match, nsh.np, 0, 0);
>          break;
>      case MFF_NSH_SPI:
> -        MATCH_SET_FIELD_MASKED(match, nsh.spi, htonl(0), htonl(0));
> +        match->wc.masks.nsh.path_hdr &= ~htonl(NSH_SPI_MASK);
> +        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr, htonl(0));


Dito.

>          break;
>      case MFF_NSH_SI:
> -        MATCH_SET_FIELD_MASKED(match, nsh.si, 0, 0);
> +        match->wc.masks.nsh.path_hdr &= ~htonl(NSH_SI_MASK);
> +        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr, 0);

Dito.

>          break;
>      case MFF_NSH_C1:
>      case MFF_NSH_C2:

> @@ -2363,10 +2391,14 @@ mf_set(const struct mf_field *mf,
>          MATCH_SET_FIELD_MASKED(match, nsh.np, value->u8, mask->u8);
>          break;
>      case MFF_NSH_SPI:
> -        MATCH_SET_FIELD_MASKED(match, nsh.spi, value->be32, mask-
> >be32);
> +        match->wc.masks.nsh.path_hdr |= mask->be32;

I believe this is incorrect. The mask is related to the value and requires shifting.
One more reason to use wrapper functions.

> +        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr,
> +                             value->be32 & mask->be32);
>          break;
>      case MFF_NSH_SI:
> -        MATCH_SET_FIELD_MASKED(match, nsh.si, value->u8, mask->u8);
> +        match->wc.masks.nsh.path_hdr |= htonl(mask->u8);

Dito.

> +        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr,
> +                             value->u8 & mask->u8);

> diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml
> index 065fb03..a4a0735 100644
> --- a/lib/meta-flow.xml
> +++ b/lib/meta-flow.xml
> @@ -1311,7 +1311,9 @@ tcp,tp_src=0x07c0/0xfff0
> 
>    <group title="Network Service Header">
>      <field id="MFF_NSH_FLAGS"
> -        title="flags field (8 bits)"/>
> +        title="flags field (2 bits)"/>

The NSH_FLAGS field requires more explanation as it is not defined in the RFC.

> diff --git a/lib/nx-match.c b/lib/nx-match.c
> index b782e8c..dc52566 100644
> --- a/lib/nx-match.c
> +++ b/lib/nx-match.c
> @@ -1157,13 +1158,22 @@ nx_put_raw(struct ofpbuf *b, enum
> ofp_version oxm, const struct match *match,
>      /* Network Service Header */
>      nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags,
>              match->wc.masks.nsh.flags);
> +    nxm_put_8m(&ctx, MFF_NSH_TTL, oxm, flow->nsh.ttl,
> +            match->wc.masks.nsh.ttl);
>      nxm_put_8m(&ctx, MFF_NSH_MDTYPE, oxm, flow->nsh.mdtype,
>              match->wc.masks.nsh.mdtype);
>      nxm_put_8m(&ctx, MFF_NSH_NP, oxm, flow->nsh.np,
>              match->wc.masks.nsh.np);
> -    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm, flow->nsh.spi,
> -                match->wc.masks.nsh.spi);
> -    nxm_put_8m(&ctx, MFF_NSH_SI, oxm, flow->nsh.si, match-
> >wc.masks.nsh.si);
> +    spi_mask = nsh_path_hdr_to_spi(match->wc.masks.nsh.path_hdr);
> +    if (spi_mask == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> +        spi_mask = OVS_BE32_MAX;
> +    }
> +    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm,
> +                nsh_path_hdr_to_spi(flow->nsh.path_hdr),
> +                spi_mask);

I think it is not correct to expand a 24 bit all-ones mask for the SPI field to a full 32-bit all-ones mask. Compare to the MFF_IPV6_LABEL case which is defined as a 20-bit field in a 4-byte OXM. The codes just puts the mask as is.

> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 5f4d23a..601c8b6 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -277,10 +277,10 @@ odp_set_nsh(struct dp_packet *packet, const
> struct ovs_key_nsh *key,
>              const struct ovs_key_nsh *mask)
>  {
>      struct nsh_hdr *nsh = dp_packet_l3(packet);
> +    ovs_be32 path_hdr;
> 
>      if (!mask) {
> -        nsh->ver_flags_len = htons(key->flags << NSH_FLAGS_SHIFT) |
> -                             (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
> +        nsh_set_flags_and_ttl(nsh, key->flags, key->ttl);
>          put_16aligned_be32(&nsh->path_hdr, key->path_hdr);
>          switch (nsh->md_type) {
>              case NSH_M_TYPE1:
> @@ -294,15 +294,28 @@ odp_set_nsh(struct dp_packet *packet, const
[...]
>      } else {
> -        uint8_t flags = (ntohs(nsh->ver_flags_len) & NSH_FLAGS_MASK) >>
> -                            NSH_FLAGS_SHIFT;
> +        uint8_t flags = nsh_get_flags(nsh);
> +        uint8_t ttl = nsh_get_ttl(nsh);
> +
>          flags = key->flags | (flags & ~mask->flags);
> -        nsh->ver_flags_len = htons(flags << NSH_FLAGS_SHIFT) |
> -                             (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
> +        ttl = key->ttl | (ttl & ~mask->ttl);
> +        nsh_set_flags_and_ttl (nsh, flags, ttl);
> +
> +        uint32_t spi = ntohl(nsh_get_spi(nsh));
> +        uint8_t si = nsh_get_si(nsh);
> +        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask->path_hdr);
> +        uint8_t si_mask = nsh_path_hdr_to_si(mask->path_hdr);
> 
> -        ovs_be32 path_hdr = get_16aligned_be32(&nsh->path_hdr);
> -        path_hdr = key->path_hdr | (path_hdr & ~mask->path_hdr);
> +        if (spi_mask == 0x00ffffff) {
> +            spi_mask = UINT32_MAX;
> +        }
> +        spi = nsh_path_hdr_to_spi_uint32(key->path_hdr) | (spi &
> ~spi_mask);
> +        si = nsh_path_hdr_to_si(key->path_hdr) | (si & ~si_mask);
> +        path_hdr = nsh_get_path_hdr(nsh);
> +        nsh_path_hdr_set_spi(&path_hdr, htonl(spi));
> +        nsh_path_hdr_set_si(&path_hdr, si);
>          put_16aligned_be32(&nsh->path_hdr, path_hdr);

The above is quite obscure and far more complicated than necessary.
Could be simplified as follows: 

        uint8_t flags = nsh_get_flags(nsh);
        uint8_t ttl = nsh_get_ttl(nsh);
        ovs_be32 path_hdr = nsh_get_path_hdr(nsh);

        flags = key->flags | (flags & ~mask->flags);
        ttl = key->ttl | (ttl & ~mask->ttl);
        path_hdr = key->path_hdr | (path_hdr & ~mask->path_hdr);

        nsh_set_flags(nsh, flags);
        nsh_set_ttl(nsh, ttl);
        nsh_set_path_hdr(nsh, path_hdr);

> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index 4f1499e..ac286ea 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> @@ -319,17 +320,16 @@ format_nsh_key_mask(struct ds *ds, const struct
> ovs_key_nsh *key,
>          format_nsh_key(ds, key);
>      } else {
>          bool first = true;
> -        uint32_t spi = (ntohl(key->path_hdr) & NSH_SPI_MASK) >>
> NSH_SPI_SHIFT;
> -        uint32_t spi_mask = (ntohl(mask->path_hdr) & NSH_SPI_MASK) >>
> -                                NSH_SPI_SHIFT;
> -        if (spi_mask == 0x00ffffff) {
> +        uint32_t spi = nsh_path_hdr_to_spi_uint32(key->path_hdr);
> +        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask->path_hdr);
> +        if (spi_mask == (NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
>              spi_mask = UINT32_MAX;
>          }

I understand that you expand the mask here to 32 bits to be able to use format_be32_masked helper below, but it is confusing anyhow. Perhaps it
would be clearer to introduce a dedicated formatting function for SPI.

> -        uint8_t si = (ntohl(key->path_hdr) & NSH_SI_MASK) >> NSH_SI_SHIFT;
> -        uint8_t si_mask = (ntohl(mask->path_hdr) & NSH_SI_MASK) >>
> -                              NSH_SI_SHIFT;
> +        uint8_t si = nsh_path_hdr_to_si(key->path_hdr);
> +        uint8_t si_mask = nsh_path_hdr_to_si(mask->path_hdr);
> 
>          format_uint8_masked(ds, &first, "flags", key->flags, mask->flags);
> +        format_uint8_masked(ds, &first, "ttl", key->ttl, mask->ttl);
>          format_uint8_masked(ds, &first, "mdtype", key->mdtype, mask-
> >mdtype);
>          format_uint8_masked(ds, &first, "np", key->np, mask->np);
>          format_be32_masked(ds, &first, "spi", htonl(spi), htonl(spi_mask));

> @@ -1808,17 +1810,20 @@ parse_odp_encap_nsh_action(const char *s,
> struct ofpbuf *actions)
>              break;
>          }
> 
> -        if (ovs_scan_len(s, &n, "flags=%"SCNi8, &encap_nsh.flags)) {
> +        if (ovs_scan_len(s, &n, "flags=%"SCNi8, &encap_nsh->flags)) {
>              continue;

Any validity check for value flags<=3?

>          }
> -        if (ovs_scan_len(s, &n, "mdtype=%"SCNi8, &encap_nsh.mdtype)) {
> -            switch (encap_nsh.mdtype) {
> +        if (ovs_scan_len(s, &n, "ttl=%"SCNi8, &encap_nsh->ttl)) {
> +            continue;

Any validity check for ttl < 64?

> @@ -1826,24 +1831,24 @@ parse_odp_encap_nsh_action(const char *s,
> struct ofpbuf *actions)
>              }
>              continue;
>          }
> -        if (ovs_scan_len(s, &n, "np=%"SCNi8, &encap_nsh.np)) {
> +        if (ovs_scan_len(s, &n, "np=%"SCNi8, &encap_nsh->np)) {
>              continue;
>          }
>          if (ovs_scan_len(s, &n, "spi=0x%"SCNx32, &spi)) {
> -            encap_nsh.path_hdr =
> +            encap_nsh->path_hdr =
>                      htonl(((spi << NSH_SPI_SHIFT) & NSH_SPI_MASK) |
> -                            (ntohl(encap_nsh.path_hdr) & ~NSH_SPI_MASK));
> +                            (ntohl(encap_nsh->path_hdr) & ~NSH_SPI_MASK));
>              continue;

Why not use the access functions in nsh.h here?

>          }
>          if (ovs_scan_len(s, &n, "si=%"SCNi8, &si)) {
> -            encap_nsh.path_hdr =
> +            encap_nsh->path_hdr =
>                      htonl((si << NSH_SI_SHIFT) |
> -                            (ntohl(encap_nsh.path_hdr) & ~NSH_SI_MASK));
> +                            (ntohl(encap_nsh->path_hdr) & ~NSH_SI_MASK));
>              continue;

Dito.

> @@ -1861,28 +1866,29 @@ parse_odp_encap_nsh_action(const char *s,
> struct ofpbuf *actions)
>                  continue;
>              }
>          }
> -        else if (encap_nsh.mdtype == NSH_M_TYPE2) {
> +        else if (encap_nsh->mdtype == NSH_M_TYPE2) {
>              struct ofpbuf b;
>              char buf[512];
>              size_t mdlen;
>              if (ovs_scan_len(s, &n, "md2=0x%511[0-9a-fA-F]", buf)) {
> -                ofpbuf_use_stub(&b, encap_nsh.metadata,
> -                                OVS_ENCAP_NSH_MAX_MD_LEN);
> +                ofpbuf_use_stub(&b, encap_nsh->metadata,
> +                                NSH_CTX_HDRS_MAX_LEN);
>                  ofpbuf_put_hex(&b, buf, &mdlen);

Here it might be necessary to pad the metadata buffer with zeros to 4 bytes.

> @@ -6798,19 +6774,20 @@ odp_put_encap_nsh_action(struct ofpbuf
> *odp_actions,
[...]
> -    switch (encap_nsh.mdtype) {
> +    switch (encap_nsh->mdtype) {
>      case NSH_M_TYPE1: {
>          struct nsh_md1_ctx *md1 =
> -            ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh.metadata);
> -        encap_nsh.mdlen = NSH_M_TYPE1_MDLEN;
> +            ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh->metadata);
> +        encap_nsh->mdlen = NSH_M_TYPE1_MDLEN;
>          for (int i = 0; i < 4; i++) {
>              put_16aligned_be32(&md1->c[i], flow->nsh.c[i]);

No need for put_16aligned_be32() here as the encap_nsh action and the metadata included are 8-byte aligned.

> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 9e1f837..1b99ad5 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -5885,10 +5885,10 @@ rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
>      /* Populate the flow with the new NSH header. */
>      flow->packet_type = htonl(PT_NSH);
>      flow->dl_type = htons(ETH_TYPE_NSH);
> -    flow->nsh.flags = 0;    /* */
> +    flow->nsh.flags = 0;
> +    flow->nsh.ttl = 63;
>      flow->nsh.np = np;
> -    flow->nsh.spi = 0;
> -    flow->nsh.si = 255;
> +    flow->nsh.path_hdr = htonl(255);

Use nsh setter functions instead?

> @@ -5901,6 +5901,7 @@ rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
>      } else if (md_type == NSH_M_TYPE2) {
>          flow->nsh.mdtype = NSH_M_TYPE2;
>      }
> +    flow->nsh.mdtype &= NSH_MDTYPE_MASK;

Not really needed here, as we set the mdtype to a defined value.

I didn't check the unit test adaptations to the introduction of ttl field.

BR, Jan
Yang, Yi Aug. 29, 2017, 6:30 a.m. UTC | #2
On Tue, Aug 29, 2017 at 07:31:22AM +0800, Jan Scheurich wrote:
> Hi Yi,
> 
> Thanks for the fixes. Please find my individual comments inline.
> 
> BR, Jan
> 
> > -----Original Message-----
> > -#define OVS_ENCAP_NSH_MAX_MD_LEN 16
> >  /*
> >   * struct ovs_action_encap_nsh - %OVS_ACTION_ATTR_ENCAP_NSH
> >   * @flags: NSH header flags.
> > + * @ttl: NSH header TTL.
> >   * @mdtype: NSH metadata type.
> >   * @mdlen: Length of NSH metadata in bytes.
> >   * @np: NSH next_protocol: Inner packet type.
> > @@ -805,11 +796,13 @@ struct ovs_action_push_eth {
> >   */
> >  struct ovs_action_encap_nsh {
> >      uint8_t flags;
> > +    uint8_t ttl;
> >      uint8_t mdtype;
> > -    uint8_t mdlen;
> >      uint8_t np;
> >      __be32 path_hdr;
> > -    uint8_t metadata[OVS_ENCAP_NSH_MAX_MD_LEN];
> > +    uint8_t mdlen;
> > +    uint8_t pad[7]; /* Aligned to 64 bit boundary for metadata */
> > +    uint8_t metadata[];
> >  };
> 
> Alignment of metadata to 32 bits should suffice here. Netlink attrs are only 4-byte aligned anyhow. The same goes for the MD1 context headers inside metadata.
>

Ok, I will change it to pad[3].

> > diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
> > index a658a58..cd61fff 100644
> > --- a/include/openvswitch/flow.h
> > +++ b/include/openvswitch/flow.h
> > @@ -146,7 +146,7 @@ struct flow {
> >      struct eth_addr arp_tha;    /* ARP/ND target hardware address. */
> >      ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching L4. */
> >      ovs_be16 pad2;              /* Pad to 64 bits. */
> > -    struct flow_nsh nsh;        /* Network Service Header keys */
> > +    struct ovs_key_nsh nsh;     /* Network Service Header keys */
> 
> Why rename this type to struct ovs_key_nsh?

I have explained it in previous review, new ttl field results in it
isn't 64 bit aligned, moreover, we musnt't do some unnecessary
conversion if we use struct ovs_key_nsh, netlink also used struct
ovs_key_nsh, it is very reasonable to use struct ovs_key_nsh for this,
why don't we use it?

> 
> > diff --git a/include/openvswitch/nsh.h b/include/openvswitch/nsh.h
> [...]
> > @@ -62,7 +210,7 @@ struct nsh_md2_tlv {
> >  };
> > 
> >  struct nsh_hdr {
> > -    ovs_be16 ver_flags_len;
> > +    ovs_be16 ver_flags_ttl_len;
> >      uint8_t md_type;
> >      uint8_t next_proto;
> >      ovs_16aligned_be32 path_hdr;
> > @@ -75,11 +223,16 @@ struct nsh_hdr {
> >  /* Masking NSH header fields. */
> >  #define NSH_VER_MASK       0xc000
> >  #define NSH_VER_SHIFT      14
> > -#define NSH_FLAGS_MASK     0x3fc0
> > -#define NSH_FLAGS_SHIFT    6
> > +#define NSH_FLAGS_MASK     0x3000
> > +#define NSH_FLAGS_SHIFT    12
> 
> With this definition of NSH FLAGS the match field NSH_FLAGS covers
> only the two bits between Version and TTL field. Bit 1 is the OaM flag, 
> bit 0 is currently unused. The remaining 4 unused bits between Length
> and MD Type fields are excluded. That is perhaps OK as we cannot be
> sure the these 4 bits will be used as "flags" in the future. But it needs
> to be documented clearly that NSH_FLAGS has only two bits and which
> bits in the NSH header it covers.

Make sense, we can add more information for this in lib/meta-flow.xml.

> 
> > +static inline void
> > +__nsh_set_xflag(struct nsh_hdr *nsh, uint16_t xflag, uint16_t xmask)
> > +{
> > +    nsh->ver_flags_ttl_len
> > +        = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
> > +}
> 
> Avoid __* prefix. Better call it nsh_set_base_hdr_masked(). 

We just use __ to mark it as an internal function which will not be used
outside of nsh.h, this is C coding convention.

> 
> > +
> > +static inline void nsh_set_flags_and_ttl(struct nsh_hdr *nsh, uint8_t flags,
> > +                                         uint8_t ttl)
> > +{
> > +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> > NSH_FLAGS_MASK) |
> > +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
> > +                    NSH_FLAGS_MASK | NSH_TTL_MASK);
> > +}
> > +
> > +static inline void nsh_set_flags_ttl_len(struct nsh_hdr *nsh, uint8_t flags,
> > +                                         uint8_t ttl, uint8_t len)
> > +{
> > +    len = len >> 2;
> > +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> > NSH_FLAGS_MASK) |
> > +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
> > +                         ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
> > +                    NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
> > +}
> 
> Do you really need to these hybrid functions? Why not use modular functions nsh_set_flags(), nsh_set_ttl() and nsh_set_len()? I believe the run-time overhead is negligible with an optimizing compiler.

Basically these three functions will be executed together, so one is
better.

> 
> For symmetry you should also add a function setter function for path_hdr:

I think put_16aligned_be32(&nsh->path_hdr, path_hdr) is better than
nsh_set_path_hdr, here isn't C++ programming :-)

> 
> static inline void
> nsh_set_path_hdr(struct nsh_hdr *nsh, ovs_be32 path_hdr)
> {
>     put_16aligned_be32(&nsh->path_hdr, path_hdr);
> }
> 
> > diff --git a/include/openvswitch/packets.h b/include/openvswitch/packets.h
> > index be91e02..603ec44 100644
> > --- a/include/openvswitch/packets.h
> > +++ b/include/openvswitch/packets.h
> >
> >  /* Network Service Header keys */
> > -struct flow_nsh {
> > +struct ovs_key_nsh {
> >      uint8_t flags;
> > +    uint8_t ttl;
> >      uint8_t mdtype;
> >      uint8_t np;
> > -    uint8_t si;
> > -    ovs_be32 spi;
> > +    ovs_be32 path_hdr;
> >      ovs_be32 c[4];
> >  };
> 
> Why did you rename this struct? As part of the struct flow, it was
> more appropriately called struct flow_nsh.

It isn't name issue, we want to use common struct ovs_key_nsh in
multiple function modules.

> 
> > diff --git a/lib/flow.c b/lib/flow.c
> > index b2b10aa..547ff70 100644
> > --- a/lib/flow.c
> > +++ b/lib/flow.c
> >  bool
> > -parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key)
> > +parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key)
> >  {
> [...]
> > -    /* Check if it is long enough for NSH header, doesn't support
> > -     * MD type 2 yet
> > -     */
> >      if (OVS_UNLIKELY(*sizep < NSH_M_TYPE1_LEN)) {
> >          return false;
> >      }
> 
> This should compare against the minimum fixed size (NSH_BASE_HDR_LEN)
> of an NSH header, not NSH_M_TYPE1_LEN as we want to successfully parse 
> any MD type header.

My bad, I will use NSH_BASE_HDR_LEN.

> 
> >      switch (key->mdtype) {
> >          case NSH_M_TYPE1:
> > +            if (length != NSH_M_TYPE1_LEN) {
> > +                return false;
> > +            }
> >              for (size_t i = 0; i < 4; i++) {
> >                  key->c[i] = get_16aligned_be32(&nsh->md1.c[i]);
> >              }
> >              break;
> >          case NSH_M_TYPE2:
> > -            /* Don't support MD type 2 yet, so return false */
> > +            /* Don't support MD type 2 metedata parsing yet */
> > +            if (length < NSH_BASE_HDR_LEN) {
> > +                return false;
> > +            }
> 
> This check is superfluous as it has been done before.

Yeah, I'll remove it.

> 
> > diff --git a/lib/meta-flow.c b/lib/meta-flow.c
> > index 64a8cf1..b50679c 100644
> > --- a/lib/meta-flow.c
> > +++ b/lib/meta-flow.c
> > @@ -906,10 +914,13 @@ mf_get_value(const struct mf_field *mf, const
> > struct flow *flow,
> >          value->u8 = flow->nsh.np;
> >          break;
> >      case MFF_NSH_SPI:
> > -        value->be32 = flow->nsh.spi;
> > +        value->be32 = nsh_path_hdr_to_spi(flow->nsh.path_hdr);
> > +        if (value->be32 == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> > +            value->be32 = OVS_BE32_MAX;
> > +        }
> 
> This looks strange. The SPI is a 24 bit field. Why would you expand it to OVS_BE32_MAX?

Indeed, 24 bit mask is enough, I'll remove it.

> 
> > @@ -1221,10 +1235,12 @@ mf_set_value(const struct mf_field *mf,
> >          MATCH_SET_FIELD_UINT8(match, nsh.np, value->u8);
> >          break;
> >      case MFF_NSH_SPI:
> > -        MATCH_SET_FIELD_BE32(match, nsh.spi, value->be32);
> > +        match->wc.masks.nsh.path_hdr |= htonl(NSH_SPI_MASK);
> > +        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr, value->be32);
> 
> Introduce a wrapper function match_set_nsh_spi() where you rely on the setter functions in nsh.h to manipulate the path_hdr.

Make sense, I have done in new version.

> 
> >          break;
> >      case MFF_NSH_SI:
> > -        MATCH_SET_FIELD_UINT8(match, nsh.si, value->u8);
> > +        match->wc.masks.nsh.path_hdr |= htonl(NSH_SI_MASK);
> > +        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr, value->u8);
> >          break;
> 
> Dito.
> 
> > @@ -2103,10 +2126,12 @@ mf_set_wild(const struct mf_field *mf, struct
> > match *match, char **err_str)
> >          MATCH_SET_FIELD_MASKED(match, nsh.np, 0, 0);
> >          break;
> >      case MFF_NSH_SPI:
> > -        MATCH_SET_FIELD_MASKED(match, nsh.spi, htonl(0), htonl(0));
> > +        match->wc.masks.nsh.path_hdr &= ~htonl(NSH_SPI_MASK);
> > +        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr, htonl(0));
> 
> 
> Dito.
> 
> >          break;
> >      case MFF_NSH_SI:
> > -        MATCH_SET_FIELD_MASKED(match, nsh.si, 0, 0);
> > +        match->wc.masks.nsh.path_hdr &= ~htonl(NSH_SI_MASK);
> > +        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr, 0);
> 
> Dito.
> 
> >          break;
> >      case MFF_NSH_C1:
> >      case MFF_NSH_C2:
> 
> > @@ -2363,10 +2391,14 @@ mf_set(const struct mf_field *mf,
> >          MATCH_SET_FIELD_MASKED(match, nsh.np, value->u8, mask->u8);
> >          break;
> >      case MFF_NSH_SPI:
> > -        MATCH_SET_FIELD_MASKED(match, nsh.spi, value->be32, mask-
> > >be32);
> > +        match->wc.masks.nsh.path_hdr |= mask->be32;
> 
> I believe this is incorrect. The mask is related to the value and requires shifting.
> One more reason to use wrapper functions.

You're right, but 4 nsh unit tests are ok, I'm not sure why. I have used
wrapper function to do this in new version.

> 
> > +        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr,
> > +                             value->be32 & mask->be32);
> >          break;
> >      case MFF_NSH_SI:
> > -        MATCH_SET_FIELD_MASKED(match, nsh.si, value->u8, mask->u8);
> > +        match->wc.masks.nsh.path_hdr |= htonl(mask->u8);
> 
> Dito.
> 
> > +        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr,
> > +                             value->u8 & mask->u8);
> 
> > diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml
> > index 065fb03..a4a0735 100644
> > --- a/lib/meta-flow.xml
> > +++ b/lib/meta-flow.xml
> > @@ -1311,7 +1311,9 @@ tcp,tp_src=0x07c0/0xfff0
> > 
> >    <group title="Network Service Header">
> >      <field id="MFF_NSH_FLAGS"
> > -        title="flags field (8 bits)"/>
> > +        title="flags field (2 bits)"/>
> 
> The NSH_FLAGS field requires more explanation as it is not defined in the RFC.

Ok, I'll add more information here.

> 
> > diff --git a/lib/nx-match.c b/lib/nx-match.c
> > index b782e8c..dc52566 100644
> > --- a/lib/nx-match.c
> > +++ b/lib/nx-match.c
> > @@ -1157,13 +1158,22 @@ nx_put_raw(struct ofpbuf *b, enum
> > ofp_version oxm, const struct match *match,
> >      /* Network Service Header */
> >      nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags,
> >              match->wc.masks.nsh.flags);
> > +    nxm_put_8m(&ctx, MFF_NSH_TTL, oxm, flow->nsh.ttl,
> > +            match->wc.masks.nsh.ttl);
> >      nxm_put_8m(&ctx, MFF_NSH_MDTYPE, oxm, flow->nsh.mdtype,
> >              match->wc.masks.nsh.mdtype);
> >      nxm_put_8m(&ctx, MFF_NSH_NP, oxm, flow->nsh.np,
> >              match->wc.masks.nsh.np);
> > -    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm, flow->nsh.spi,
> > -                match->wc.masks.nsh.spi);
> > -    nxm_put_8m(&ctx, MFF_NSH_SI, oxm, flow->nsh.si, match-
> > >wc.masks.nsh.si);
> > +    spi_mask = nsh_path_hdr_to_spi(match->wc.masks.nsh.path_hdr);
> > +    if (spi_mask == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> > +        spi_mask = OVS_BE32_MAX;
> > +    }
> > +    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm,
> > +                nsh_path_hdr_to_spi(flow->nsh.path_hdr),
> > +                spi_mask);
> 
> I think it is not correct to expand a 24 bit all-ones mask for the SPI field to a full 32-bit all-ones mask. Compare to the MFF_IPV6_LABEL case which is defined as a 20-bit field in a 4-byte OXM. The codes just puts the mask as is.

MFF_IPV6_LABEL is different from this, MPLS_LABEL is same as this one.
But it will return BAD_MASK error if we don't expand it ot OVS_BE32_MAX
because mf_is_mask_valid expects mask is all one, the actual mask is
0x00FFFFFF. I have tried this many times, please do tell me a better solution
for this if you have a better one.

> 
> > diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> > index 5f4d23a..601c8b6 100644
> > --- a/lib/odp-execute.c
> > +++ b/lib/odp-execute.c
> > @@ -277,10 +277,10 @@ odp_set_nsh(struct dp_packet *packet, const
> > struct ovs_key_nsh *key,
> >              const struct ovs_key_nsh *mask)
> >  {
> >      struct nsh_hdr *nsh = dp_packet_l3(packet);
> > +    ovs_be32 path_hdr;
> > 
> >      if (!mask) {
> > -        nsh->ver_flags_len = htons(key->flags << NSH_FLAGS_SHIFT) |
> > -                             (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
> > +        nsh_set_flags_and_ttl(nsh, key->flags, key->ttl);
> >          put_16aligned_be32(&nsh->path_hdr, key->path_hdr);
> >          switch (nsh->md_type) {
> >              case NSH_M_TYPE1:
> > @@ -294,15 +294,28 @@ odp_set_nsh(struct dp_packet *packet, const
> [...]
> >      } else {
> > -        uint8_t flags = (ntohs(nsh->ver_flags_len) & NSH_FLAGS_MASK) >>
> > -                            NSH_FLAGS_SHIFT;
> > +        uint8_t flags = nsh_get_flags(nsh);
> > +        uint8_t ttl = nsh_get_ttl(nsh);
> > +
> >          flags = key->flags | (flags & ~mask->flags);
> > -        nsh->ver_flags_len = htons(flags << NSH_FLAGS_SHIFT) |
> > -                             (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
> > +        ttl = key->ttl | (ttl & ~mask->ttl);
> > +        nsh_set_flags_and_ttl (nsh, flags, ttl);
> > +
> > +        uint32_t spi = ntohl(nsh_get_spi(nsh));
> > +        uint8_t si = nsh_get_si(nsh);
> > +        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask->path_hdr);
> > +        uint8_t si_mask = nsh_path_hdr_to_si(mask->path_hdr);
> > 
> > -        ovs_be32 path_hdr = get_16aligned_be32(&nsh->path_hdr);
> > -        path_hdr = key->path_hdr | (path_hdr & ~mask->path_hdr);
> > +        if (spi_mask == 0x00ffffff) {
> > +            spi_mask = UINT32_MAX;
> > +        }
> > +        spi = nsh_path_hdr_to_spi_uint32(key->path_hdr) | (spi &
> > ~spi_mask);
> > +        si = nsh_path_hdr_to_si(key->path_hdr) | (si & ~si_mask);
> > +        path_hdr = nsh_get_path_hdr(nsh);
> > +        nsh_path_hdr_set_spi(&path_hdr, htonl(spi));
> > +        nsh_path_hdr_set_si(&path_hdr, si);
> >          put_16aligned_be32(&nsh->path_hdr, path_hdr);
> 
> The above is quite obscure and far more complicated than necessary.
> Could be simplified as follows: 
> 
>         uint8_t flags = nsh_get_flags(nsh);
>         uint8_t ttl = nsh_get_ttl(nsh);
>         ovs_be32 path_hdr = nsh_get_path_hdr(nsh);
> 
>         flags = key->flags | (flags & ~mask->flags);
>         ttl = key->ttl | (ttl & ~mask->ttl);
>         path_hdr = key->path_hdr | (path_hdr & ~mask->path_hdr);
> 
>         nsh_set_flags(nsh, flags);
>         nsh_set_ttl(nsh, ttl);
>         nsh_set_path_hdr(nsh, path_hdr);

I tried the one you provided, it is ok, so I changed it to your version
in new version.

> 
> > diff --git a/lib/odp-util.c b/lib/odp-util.c
> > index 4f1499e..ac286ea 100644
> > --- a/lib/odp-util.c
> > +++ b/lib/odp-util.c
> > @@ -319,17 +320,16 @@ format_nsh_key_mask(struct ds *ds, const struct
> > ovs_key_nsh *key,
> >          format_nsh_key(ds, key);
> >      } else {
> >          bool first = true;
> > -        uint32_t spi = (ntohl(key->path_hdr) & NSH_SPI_MASK) >>
> > NSH_SPI_SHIFT;
> > -        uint32_t spi_mask = (ntohl(mask->path_hdr) & NSH_SPI_MASK) >>
> > -                                NSH_SPI_SHIFT;
> > -        if (spi_mask == 0x00ffffff) {
> > +        uint32_t spi = nsh_path_hdr_to_spi_uint32(key->path_hdr);
> > +        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask->path_hdr);
> > +        if (spi_mask == (NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> >              spi_mask = UINT32_MAX;
> >          }
> 
> I understand that you expand the mask here to 32 bits to be able to use format_be32_masked helper below, but it is confusing anyhow. Perhaps it
> would be clearer to introduce a dedicated formatting function for SPI.

But a dedicated formatting function for SPI will have the same code as
format_be32_masked does, so we mustn't duplicate code.

> 
> > -        uint8_t si = (ntohl(key->path_hdr) & NSH_SI_MASK) >> NSH_SI_SHIFT;
> > -        uint8_t si_mask = (ntohl(mask->path_hdr) & NSH_SI_MASK) >>
> > -                              NSH_SI_SHIFT;
> > +        uint8_t si = nsh_path_hdr_to_si(key->path_hdr);
> > +        uint8_t si_mask = nsh_path_hdr_to_si(mask->path_hdr);
> > 
> >          format_uint8_masked(ds, &first, "flags", key->flags, mask->flags);
> > +        format_uint8_masked(ds, &first, "ttl", key->ttl, mask->ttl);
> >          format_uint8_masked(ds, &first, "mdtype", key->mdtype, mask-
> > >mdtype);
> >          format_uint8_masked(ds, &first, "np", key->np, mask->np);
> >          format_be32_masked(ds, &first, "spi", htonl(spi), htonl(spi_mask));
> 
> > @@ -1808,17 +1810,20 @@ parse_odp_encap_nsh_action(const char *s,
> > struct ofpbuf *actions)
> >              break;
> >          }
> > 
> > -        if (ovs_scan_len(s, &n, "flags=%"SCNi8, &encap_nsh.flags)) {
> > +        if (ovs_scan_len(s, &n, "flags=%"SCNi8, &encap_nsh->flags)) {
> >              continue;
> 
> Any validity check for value flags<=3?

Added check in new version.

> 
> >          }
> > -        if (ovs_scan_len(s, &n, "mdtype=%"SCNi8, &encap_nsh.mdtype)) {
> > -            switch (encap_nsh.mdtype) {
> > +        if (ovs_scan_len(s, &n, "ttl=%"SCNi8, &encap_nsh->ttl)) {
> > +            continue;
> 
> Any validity check for ttl < 64?

Added check in ne version.

> 
> > @@ -1826,24 +1831,24 @@ parse_odp_encap_nsh_action(const char *s,
> > struct ofpbuf *actions)
> >              }
> >              continue;
> >          }
> > -        if (ovs_scan_len(s, &n, "np=%"SCNi8, &encap_nsh.np)) {
> > +        if (ovs_scan_len(s, &n, "np=%"SCNi8, &encap_nsh->np)) {
> >              continue;
> >          }
> >          if (ovs_scan_len(s, &n, "spi=0x%"SCNx32, &spi)) {
> > -            encap_nsh.path_hdr =
> > +            encap_nsh->path_hdr =
> >                      htonl(((spi << NSH_SPI_SHIFT) & NSH_SPI_MASK) |
> > -                            (ntohl(encap_nsh.path_hdr) & ~NSH_SPI_MASK));
> > +                            (ntohl(encap_nsh->path_hdr) & ~NSH_SPI_MASK));
> >              continue;
> 
> Why not use the access functions in nsh.h here?

Forgot that, Ben wanted me to do changes as less as possible :-), I
haved use nsh helper in new version.

> 
> >          }
> >          if (ovs_scan_len(s, &n, "si=%"SCNi8, &si)) {
> > -            encap_nsh.path_hdr =
> > +            encap_nsh->path_hdr =
> >                      htonl((si << NSH_SI_SHIFT) |
> > -                            (ntohl(encap_nsh.path_hdr) & ~NSH_SI_MASK));
> > +                            (ntohl(encap_nsh->path_hdr) & ~NSH_SI_MASK));
> >              continue;
> 
> Dito.
> 
> > @@ -1861,28 +1866,29 @@ parse_odp_encap_nsh_action(const char *s,
> > struct ofpbuf *actions)
> >                  continue;
> >              }
> >          }
> > -        else if (encap_nsh.mdtype == NSH_M_TYPE2) {
> > +        else if (encap_nsh->mdtype == NSH_M_TYPE2) {
> >              struct ofpbuf b;
> >              char buf[512];
> >              size_t mdlen;
> >              if (ovs_scan_len(s, &n, "md2=0x%511[0-9a-fA-F]", buf)) {
> > -                ofpbuf_use_stub(&b, encap_nsh.metadata,
> > -                                OVS_ENCAP_NSH_MAX_MD_LEN);
> > +                ofpbuf_use_stub(&b, encap_nsh->metadata,
> > +                                NSH_CTX_HDRS_MAX_LEN);
> >                  ofpbuf_put_hex(&b, buf, &mdlen);
> 
> Here it might be necessary to pad the metadata buffer with zeros to 4 bytes.

This is parsing the data netlink message put, md2 hex string has been
formated which has followed metadata format, so I don't think this is
necessary, in addition, pad will be helpless because md2 hex string has
set tlv len, we can't change it because of such padding.

Actucally lib/ofp-actions.c has handled this, all the data is from
lib/ofp-actions.c, so we mustn't do duplicate and helpless thing here.

> 
> > @@ -6798,19 +6774,20 @@ odp_put_encap_nsh_action(struct ofpbuf
> > *odp_actions,
> [...]
> > -    switch (encap_nsh.mdtype) {
> > +    switch (encap_nsh->mdtype) {
> >      case NSH_M_TYPE1: {
> >          struct nsh_md1_ctx *md1 =
> > -            ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh.metadata);
> > -        encap_nsh.mdlen = NSH_M_TYPE1_MDLEN;
> > +            ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh->metadata);
> > +        encap_nsh->mdlen = NSH_M_TYPE1_MDLEN;
> >          for (int i = 0; i < 4; i++) {
> >              put_16aligned_be32(&md1->c[i], flow->nsh.c[i]);
> 
> No need for put_16aligned_be32() here as the encap_nsh action and the metadata included are 8-byte aligned.

I used memcpy(md1, flow->nsh.c, sizeof(*md1)) to do this in new version.

> 
> > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> > index 9e1f837..1b99ad5 100644
> > --- a/ofproto/ofproto-dpif-xlate.c
> > +++ b/ofproto/ofproto-dpif-xlate.c
> > @@ -5885,10 +5885,10 @@ rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
> >      /* Populate the flow with the new NSH header. */
> >      flow->packet_type = htonl(PT_NSH);
> >      flow->dl_type = htons(ETH_TYPE_NSH);
> > -    flow->nsh.flags = 0;    /* */
> > +    flow->nsh.flags = 0;
> > +    flow->nsh.ttl = 63;
> >      flow->nsh.np = np;
> > -    flow->nsh.spi = 0;
> > -    flow->nsh.si = 255;
> > +    flow->nsh.path_hdr = htonl(255);
> 
> Use nsh setter functions instead?

Done in new version.

> 
> > @@ -5901,6 +5901,7 @@ rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
> >      } else if (md_type == NSH_M_TYPE2) {
> >          flow->nsh.mdtype = NSH_M_TYPE2;
> >      }
> > +    flow->nsh.mdtype &= NSH_MDTYPE_MASK;
> 
> Not really needed here, as we set the mdtype to a defined value.

Removed it in new version.

> 
> I didn't check the unit test adaptations to the introduction of ttl field.

I have changed nsh unit tests for ttl and dec_nsh_ttl, they have been
verified by nsh unit tests.

I'll send out new version after Ben gives me feedbacks.

> 
> BR, Jan
Ben Pfaff Aug. 29, 2017, 3:44 p.m. UTC | #3
On Tue, Aug 29, 2017 at 02:30:35PM +0800, Yang, Yi wrote:
> I'll send out new version after Ben gives me feedbacks.

I'd like to see v5.  I'm hoping to just go ahead and apply it.
Jan Scheurich Aug. 29, 2017, 5:02 p.m. UTC | #4
Hi Yi,

Some more comments below.

/Jan

> -----Original Message-----
> From: Yang, Yi [mailto:yi.y.yang@intel.com]
> Sent: Tuesday, 29 August, 2017 08:31
> To: Jan Scheurich <jan.scheurich@ericsson.com>
> Cc: dev@openvswitch.org; blp@ovn.org
> Subject: Re: [PATCH v4 2/3] nsh: add new flow key 'ttl'
> 
> 
> > > diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
> > > index a658a58..cd61fff 100644
> > > --- a/include/openvswitch/flow.h
> > > +++ b/include/openvswitch/flow.h
> > > @@ -146,7 +146,7 @@ struct flow {
> > >      struct eth_addr arp_tha;    /* ARP/ND target hardware address. */
> > >      ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching L4.
> */
> > >      ovs_be16 pad2;              /* Pad to 64 bits. */
> > > -    struct flow_nsh nsh;        /* Network Service Header keys */
> > > +    struct ovs_key_nsh nsh;     /* Network Service Header keys */
> >
> > Why rename this type to struct ovs_key_nsh?
> 
> I have explained it in previous review, new ttl field results in it isn't 64 bit
> aligned, moreover, we musnt't do some unnecessary conversion if we use
> struct ovs_key_nsh, netlink also used struct ovs_key_nsh, it is very
> reasonable to use struct ovs_key_nsh for this, why don't we use it?

Not arguing alignment here. That resulted in merging spi and si into path_hdr
and is OK.

All structs ovs_key_xxx are defined in datapath/linux/.../openvswitch.h, while all 
structs flow_xxx are defined in lib/packets.h. You have defined struct 
ovs_key_nsh in lib/packets.h and refer to it both in struct flow and in the 
netlink attribute. This violates the standard practices. Even if they are
isomorphic, I think we should have independent type definitions and a (trivial)
conversion function.

> >
> > > +static inline void
> > > +__nsh_set_xflag(struct nsh_hdr *nsh, uint16_t xflag, uint16_t
> > > +xmask) {
> > > +    nsh->ver_flags_ttl_len
> > > +        = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
> > > +}
> >
> > Avoid __* prefix. Better call it nsh_set_base_hdr_masked().
> 
> We just use __ to mark it as an internal function which will not be used
> outside of nsh.h, this is C coding convention.

The use of __* prefix is reserved for the C compiler. In OVS we typically 
use *__ postfix for internal functions.

> > > +static inline void nsh_set_flags_and_ttl(struct nsh_hdr *nsh, uint8_t
> flags,
> > > +                                         uint8_t ttl) {
> > > +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> > > NSH_FLAGS_MASK) |
> > > +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
> > > +                    NSH_FLAGS_MASK | NSH_TTL_MASK); }
> > > +
> > > +static inline void nsh_set_flags_ttl_len(struct nsh_hdr *nsh, uint8_t
> flags,
> > > +                                         uint8_t ttl, uint8_t len)
> > > +{
> > > +    len = len >> 2;
> > > +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> > > NSH_FLAGS_MASK) |
> > > +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
> > > +                         ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
> > > +                    NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
> > > +}
> >
> > Do you really need to these hybrid functions? Why not use modular
> functions nsh_set_flags(), nsh_set_ttl() and nsh_set_len()? I believe the run-
> time overhead is negligible with an optimizing compiler.
> 
> Basically these three functions will be executed together, so one is better.

They are set in different combinations. I'd say it is better not have a dedicated
setter function for every required combination. Only a significant run-time 
overhead might tempt one to introduce collapsed functions.

> > For symmetry you should also add a function setter function for path_hdr:
> 
> I think put_16aligned_be32(&nsh->path_hdr, path_hdr) is better than
> nsh_set_path_hdr, here isn't C++ programming :-)

I don't agree. You have nsh_get_path_hdr(), so why are set and get handled 
differently? The main point of these functions is to shield the user from the
nasty alignment details of struct nsh_hdr when accessing the packet headers.


> >
> > Why did you rename this struct? As part of the struct flow, it was
> > more appropriately called struct flow_nsh.
> 
> It isn't name issue, we want to use common struct ovs_key_nsh in multiple
> function modules.

Please see above. Struct ovs_key_nsh should be defined in 
datapath/linux/.../openvswitch.h to be used on the netlink interface. 
struct flow_nsh should be defined here in lib/packets.h and used in struct flow.

> > > diff --git a/lib/nx-match.c b/lib/nx-match.c index b782e8c..dc52566
> > > 100644
> > > --- a/lib/nx-match.c
> > > +++ b/lib/nx-match.c
> > > @@ -1157,13 +1158,22 @@ nx_put_raw(struct ofpbuf *b, enum
> > > ofp_version oxm, const struct match *match,
> > >      /* Network Service Header */
> > >      nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags,
> > >              match->wc.masks.nsh.flags);
> > > +    nxm_put_8m(&ctx, MFF_NSH_TTL, oxm, flow->nsh.ttl,
> > > +            match->wc.masks.nsh.ttl);
> > >      nxm_put_8m(&ctx, MFF_NSH_MDTYPE, oxm, flow->nsh.mdtype,
> > >              match->wc.masks.nsh.mdtype);
> > >      nxm_put_8m(&ctx, MFF_NSH_NP, oxm, flow->nsh.np,
> > >              match->wc.masks.nsh.np);
> > > -    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm, flow->nsh.spi,
> > > -                match->wc.masks.nsh.spi);
> > > -    nxm_put_8m(&ctx, MFF_NSH_SI, oxm, flow->nsh.si, match-
> > > >wc.masks.nsh.si);
> > > +    spi_mask = nsh_path_hdr_to_spi(match->wc.masks.nsh.path_hdr);
> > > +    if (spi_mask == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> > > +        spi_mask = OVS_BE32_MAX;
> > > +    }
> > > +    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm,
> > > +                nsh_path_hdr_to_spi(flow->nsh.path_hdr),
> > > +                spi_mask);
> >
> > I think it is not correct to expand a 24 bit all-ones mask for the SPI field to
> a full 32-bit all-ones mask. Compare to the MFF_IPV6_LABEL case which is
> defined as a 20-bit field in a 4-byte OXM. The codes just puts the mask as
> is.
> 
> MFF_IPV6_LABEL is different from this, MPLS_LABEL is same as this one.
> But it will return BAD_MASK error if we don't expand it ot OVS_BE32_MAX
> because mf_is_mask_valid expects mask is all one, the actual mask is
> 0x00FFFFFF. I have tried this many times, please do tell me a better
> solution for this if you have a better one.

MFF_MPLS_LABEL is not comparable because the label is not maskable.
MFF_IPV6_LABEL, in contrast, is a maskable 20 bit field in a 32 bit OXM 
container. And this is about including the mask in the in the OXM.

But I double checked the MFF_IPV6_LABEL code. It indeed sets the internal 
Mask to OVS_BE32_MAX for an exact match of the IPv6 label field. 
That's why we can use the generic nxm_put_32_m() function to encode the 
MFF_IPV6_LABEL OXM into the OpenFlow match. That function internally 
calls is_all_ones(mask, 4) to check for an exact match.

We need to do the same for NSH SPI. So your code was doing the right thing.

> > > diff --git a/lib/odp-util.c b/lib/odp-util.c index 4f1499e..ac286ea
> > > 100644
> > > --- a/lib/odp-util.c
> > > +++ b/lib/odp-util.c
> > > @@ -319,17 +320,16 @@ format_nsh_key_mask(struct ds *ds, const
> > > struct ovs_key_nsh *key,
> > >          format_nsh_key(ds, key);
> > >      } else {
> > >          bool first = true;
> > > -        uint32_t spi = (ntohl(key->path_hdr) & NSH_SPI_MASK) >>
> > > NSH_SPI_SHIFT;
> > > -        uint32_t spi_mask = (ntohl(mask->path_hdr) & NSH_SPI_MASK)
> >>
> > > -                                NSH_SPI_SHIFT;
> > > -        if (spi_mask == 0x00ffffff) {
> > > +        uint32_t spi = nsh_path_hdr_to_spi_uint32(key->path_hdr);
> > > +        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask-
> >path_hdr);
> > > +        if (spi_mask == (NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> > >              spi_mask = UINT32_MAX;
> > >          }
> >
> > I understand that you expand the mask here to 32 bits to be able to
> > use format_be32_masked helper below, but it is confusing anyhow.
> Perhaps it would be clearer to introduce a dedicated formatting function
> for SPI.
> 
> But a dedicated formatting function for SPI will have the same code as
> format_be32_masked does, so we mustn't duplicate code.

OK, see also my conclusion on mask expansion above.

> > > @@ -1861,28 +1866,29 @@ parse_odp_encap_nsh_action(const char
> *s,
> > > struct ofpbuf *actions)
> > >                  continue;
> > >              }
> > >          }
> > > -        else if (encap_nsh.mdtype == NSH_M_TYPE2) {
> > > +        else if (encap_nsh->mdtype == NSH_M_TYPE2) {
> > >              struct ofpbuf b;
> > >              char buf[512];
> > >              size_t mdlen;
> > >              if (ovs_scan_len(s, &n, "md2=0x%511[0-9a-fA-F]", buf)) {
> > > -                ofpbuf_use_stub(&b, encap_nsh.metadata,
> > > -                                OVS_ENCAP_NSH_MAX_MD_LEN);
> > > +                ofpbuf_use_stub(&b, encap_nsh->metadata,
> > > +                                NSH_CTX_HDRS_MAX_LEN);
> > >                  ofpbuf_put_hex(&b, buf, &mdlen);
> >
> > Here it might be necessary to pad the metadata buffer with zeros to 4
> bytes.
> 
> This is parsing the data netlink message put, md2 hex string has been
> formated which has followed metadata format, so I don't think this is
> necessary, in addition, pad will be helpless because md2 hex string has set
> tlv len, we can't change it because of such padding.

This code is parsing the datapath action in an ovs-dpctl add-flow command.
We cannot rely on that the hex string in the command is 4-byte aligned. If it
is not, we must make sure that the buffer for the MD2 TLV is padded with 
zeros to 4 byte boundary, even though the TLV length field may be a non-
integer multiple of 4.

> 
> Actucally lib/ofp-actions.c has handled this, all the data is from lib/ofp-
> actions.c, so we mustn't do duplicate and helpless thing here.

The code in lib/ofp-actions.c parses the OpenFlow generic encap() action in
ovs-ofctl commands. This is completely independent.

> 
> >
> > I didn't check the unit test adaptations to the introduction of ttl field.
> 
> I have changed nsh unit tests for ttl and dec_nsh_ttl, they have been
> verified by nsh unit tests.
> 
> I'll send out new version after Ben gives me feedbacks.
> 
> >
> > BR, Jan
Yang, Yi Aug. 29, 2017, 5:29 p.m. UTC | #5
On Wed, Aug 30, 2017 at 01:02:07AM +0800, Jan Scheurich wrote:
> Hi Yi,
> 
> Some more comments below.
> 
> /Jan
> 
> > -----Original Message-----
> > From: Yang, Yi [mailto:yi.y.yang@intel.com]
> > Sent: Tuesday, 29 August, 2017 08:31
> > To: Jan Scheurich <jan.scheurich@ericsson.com>
> > Cc: dev@openvswitch.org; blp@ovn.org
> > Subject: Re: [PATCH v4 2/3] nsh: add new flow key 'ttl'
> > 
> > 
> > > > diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
> > > > index a658a58..cd61fff 100644
> > > > --- a/include/openvswitch/flow.h
> > > > +++ b/include/openvswitch/flow.h
> > > > @@ -146,7 +146,7 @@ struct flow {
> > > >      struct eth_addr arp_tha;    /* ARP/ND target hardware address. */
> > > >      ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching L4.
> > */
> > > >      ovs_be16 pad2;              /* Pad to 64 bits. */
> > > > -    struct flow_nsh nsh;        /* Network Service Header keys */
> > > > +    struct ovs_key_nsh nsh;     /* Network Service Header keys */
> > >
> > > Why rename this type to struct ovs_key_nsh?
> > 
> > I have explained it in previous review, new ttl field results in it isn't 64 bit
> > aligned, moreover, we musnt't do some unnecessary conversion if we use
> > struct ovs_key_nsh, netlink also used struct ovs_key_nsh, it is very
> > reasonable to use struct ovs_key_nsh for this, why don't we use it?
> 
> Not arguing alignment here. That resulted in merging spi and si into path_hdr
> and is OK.
> 
> All structs ovs_key_xxx are defined in datapath/linux/.../openvswitch.h, while all 
> structs flow_xxx are defined in lib/packets.h. You have defined struct 
> ovs_key_nsh in lib/packets.h and refer to it both in struct flow and in the 
> netlink attribute. This violates the standard practices. Even if they are
> isomorphic, I think we should have independent type definitions and a (trivial)
> conversion function.

Jan, this is just a way we can aviod breaking kernel uAPI, I also
defined struct ovs_key_nsh in net/openvswitch/flow.h not in
datapath/linux/.../openvswitch.h in kernel datapath patch.

> 
> > >
> > > > +static inline void
> > > > +__nsh_set_xflag(struct nsh_hdr *nsh, uint16_t xflag, uint16_t
> > > > +xmask) {
> > > > +    nsh->ver_flags_ttl_len
> > > > +        = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
> > > > +}
> > >
> > > Avoid __* prefix. Better call it nsh_set_base_hdr_masked().
> > 
> > We just use __ to mark it as an internal function which will not be used
> > outside of nsh.h, this is C coding convention.
> 
> The use of __* prefix is reserved for the C compiler. In OVS we typically 
> use *__ postfix for internal functions.

Ok, let me move __ as suffix.

> 
> > > > +static inline void nsh_set_flags_and_ttl(struct nsh_hdr *nsh, uint8_t
> > flags,
> > > > +                                         uint8_t ttl) {
> > > > +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> > > > NSH_FLAGS_MASK) |
> > > > +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
> > > > +                    NSH_FLAGS_MASK | NSH_TTL_MASK); }
> > > > +
> > > > +static inline void nsh_set_flags_ttl_len(struct nsh_hdr *nsh, uint8_t
> > flags,
> > > > +                                         uint8_t ttl, uint8_t len)
> > > > +{
> > > > +    len = len >> 2;
> > > > +    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) &
> > > > NSH_FLAGS_MASK) |
> > > > +                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
> > > > +                         ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
> > > > +                    NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
> > > > +}
> > >
> > > Do you really need to these hybrid functions? Why not use modular
> > functions nsh_set_flags(), nsh_set_ttl() and nsh_set_len()? I believe the run-
> > time overhead is negligible with an optimizing compiler.
> > 
> > Basically these three functions will be executed together, so one is better.
> 
> They are set in different combinations. I'd say it is better not have a dedicated
> setter function for every required combination. Only a significant run-time 
> overhead might tempt one to introduce collapsed functions.
> 
> > > For symmetry you should also add a function setter function for path_hdr:
> > 
> > I think put_16aligned_be32(&nsh->path_hdr, path_hdr) is better than
> > nsh_set_path_hdr, here isn't C++ programming :-)
> 
> I don't agree. You have nsh_get_path_hdr(), so why are set and get handled 
> differently? The main point of these functions is to shield the user from the
> nasty alignment details of struct nsh_hdr when accessing the packet headers.

I want to remove nsh_get_path_hdr, I don't want nsh.h to depend on
other stuff. put_16aligned_be32 needs to be defined in nsh.h for this.

> 
> 
> > >
> > > Why did you rename this struct? As part of the struct flow, it was
> > > more appropriately called struct flow_nsh.
> > 
> > It isn't name issue, we want to use common struct ovs_key_nsh in multiple
> > function modules.
> 
> Please see above. Struct ovs_key_nsh should be defined in 
> datapath/linux/.../openvswitch.h to be used on the netlink interface. 
> struct flow_nsh should be defined here in lib/packets.h and used in struct flow.

Please see my comments in previous section.

> 
> > > > diff --git a/lib/nx-match.c b/lib/nx-match.c index b782e8c..dc52566
> > > > 100644
> > > > --- a/lib/nx-match.c
> > > > +++ b/lib/nx-match.c
> > > > @@ -1157,13 +1158,22 @@ nx_put_raw(struct ofpbuf *b, enum
> > > > ofp_version oxm, const struct match *match,
> > > >      /* Network Service Header */
> > > >      nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags,
> > > >              match->wc.masks.nsh.flags);
> > > > +    nxm_put_8m(&ctx, MFF_NSH_TTL, oxm, flow->nsh.ttl,
> > > > +            match->wc.masks.nsh.ttl);
> > > >      nxm_put_8m(&ctx, MFF_NSH_MDTYPE, oxm, flow->nsh.mdtype,
> > > >              match->wc.masks.nsh.mdtype);
> > > >      nxm_put_8m(&ctx, MFF_NSH_NP, oxm, flow->nsh.np,
> > > >              match->wc.masks.nsh.np);
> > > > -    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm, flow->nsh.spi,
> > > > -                match->wc.masks.nsh.spi);
> > > > -    nxm_put_8m(&ctx, MFF_NSH_SI, oxm, flow->nsh.si, match-
> > > > >wc.masks.nsh.si);
> > > > +    spi_mask = nsh_path_hdr_to_spi(match->wc.masks.nsh.path_hdr);
> > > > +    if (spi_mask == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> > > > +        spi_mask = OVS_BE32_MAX;
> > > > +    }
> > > > +    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm,
> > > > +                nsh_path_hdr_to_spi(flow->nsh.path_hdr),
> > > > +                spi_mask);
> > >
> > > I think it is not correct to expand a 24 bit all-ones mask for the SPI field to
> > a full 32-bit all-ones mask. Compare to the MFF_IPV6_LABEL case which is
> > defined as a 20-bit field in a 4-byte OXM. The codes just puts the mask as
> > is.
> > 
> > MFF_IPV6_LABEL is different from this, MPLS_LABEL is same as this one.
> > But it will return BAD_MASK error if we don't expand it ot OVS_BE32_MAX
> > because mf_is_mask_valid expects mask is all one, the actual mask is
> > 0x00FFFFFF. I have tried this many times, please do tell me a better
> > solution for this if you have a better one.
> 
> MFF_MPLS_LABEL is not comparable because the label is not maskable.
> MFF_IPV6_LABEL, in contrast, is a maskable 20 bit field in a 32 bit OXM 
> container. And this is about including the mask in the in the OXM.
> 
> But I double checked the MFF_IPV6_LABEL code. It indeed sets the internal 
> Mask to OVS_BE32_MAX for an exact match of the IPv6 label field. 
> That's why we can use the generic nxm_put_32_m() function to encode the 
> MFF_IPV6_LABEL OXM into the OpenFlow match. That function internally 
> calls is_all_ones(mask, 4) to check for an exact match.
> 
> We need to do the same for NSH SPI. So your code was doing the right thing.

But spi isn't maskable. For MFF_MPLS_LABEL, it used nxm_put_32 not
nxm_put_32m, so it didn't have this issue.

    /* "nsh_spi" (aka "nsp").
     *
     * spi (service path identifier) field in NSH base header.
     *
     * Type: be32 (low 24 bits).
     * Maskable: no.
     * Formatting: hexadecimal.
     * Prerequisites: NSH.
     * Access: read/write.
     * NXM: none.
     * OXM: NXOXM_NSH_SPI(5) since OF1.3 and v2.8.
     */

> 
> > > > diff --git a/lib/odp-util.c b/lib/odp-util.c index 4f1499e..ac286ea
> > > > 100644
> > > > --- a/lib/odp-util.c
> > > > +++ b/lib/odp-util.c
> > > > @@ -319,17 +320,16 @@ format_nsh_key_mask(struct ds *ds, const
> > > > struct ovs_key_nsh *key,
> > > >          format_nsh_key(ds, key);
> > > >      } else {
> > > >          bool first = true;
> > > > -        uint32_t spi = (ntohl(key->path_hdr) & NSH_SPI_MASK) >>
> > > > NSH_SPI_SHIFT;
> > > > -        uint32_t spi_mask = (ntohl(mask->path_hdr) & NSH_SPI_MASK)
> > >>
> > > > -                                NSH_SPI_SHIFT;
> > > > -        if (spi_mask == 0x00ffffff) {
> > > > +        uint32_t spi = nsh_path_hdr_to_spi_uint32(key->path_hdr);
> > > > +        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask-
> > >path_hdr);
> > > > +        if (spi_mask == (NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
> > > >              spi_mask = UINT32_MAX;
> > > >          }
> > >
> > > I understand that you expand the mask here to 32 bits to be able to
> > > use format_be32_masked helper below, but it is confusing anyhow.
> > Perhaps it would be clearer to introduce a dedicated formatting function
> > for SPI.
> > 
> > But a dedicated formatting function for SPI will have the same code as
> > format_be32_masked does, so we mustn't duplicate code.
> 
> OK, see also my conclusion on mask expansion above.
> 
> > > > @@ -1861,28 +1866,29 @@ parse_odp_encap_nsh_action(const char
> > *s,
> > > > struct ofpbuf *actions)
> > > >                  continue;
> > > >              }
> > > >          }
> > > > -        else if (encap_nsh.mdtype == NSH_M_TYPE2) {
> > > > +        else if (encap_nsh->mdtype == NSH_M_TYPE2) {
> > > >              struct ofpbuf b;
> > > >              char buf[512];
> > > >              size_t mdlen;
> > > >              if (ovs_scan_len(s, &n, "md2=0x%511[0-9a-fA-F]", buf)) {
> > > > -                ofpbuf_use_stub(&b, encap_nsh.metadata,
> > > > -                                OVS_ENCAP_NSH_MAX_MD_LEN);
> > > > +                ofpbuf_use_stub(&b, encap_nsh->metadata,
> > > > +                                NSH_CTX_HDRS_MAX_LEN);
> > > >                  ofpbuf_put_hex(&b, buf, &mdlen);
> > >
> > > Here it might be necessary to pad the metadata buffer with zeros to 4
> > bytes.
> > 
> > This is parsing the data netlink message put, md2 hex string has been
> > formated which has followed metadata format, so I don't think this is
> > necessary, in addition, pad will be helpless because md2 hex string has set
> > tlv len, we can't change it because of such padding.
> 
> This code is parsing the datapath action in an ovs-dpctl add-flow command.
> We cannot rely on that the hex string in the command is 4-byte aligned. If it
> is not, we must make sure that the buffer for the MD2 TLV is padded with 
> zeros to 4 byte boundary, even though the TLV length field may be a non-
> integer multiple of 4.

Will add padding for 4 bytes alignment in new version.

> 
> > 
> > Actucally lib/ofp-actions.c has handled this, all the data is from lib/ofp-
> > actions.c, so we mustn't do duplicate and helpless thing here.
> 
> The code in lib/ofp-actions.c parses the OpenFlow generic encap() action in
> ovs-ofctl commands. This is completely independent.
> 
> > 
> > >
> > > I didn't check the unit test adaptations to the introduction of ttl field.
> > 
> > I have changed nsh unit tests for ttl and dec_nsh_ttl, they have been
> > verified by nsh unit tests.
> > 
> > I'll send out new version after Ben gives me feedbacks.
> > 
> > >
> > > BR, Jan
diff mbox

Patch

diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index bc6c94b..32804db 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -492,15 +492,6 @@  struct ovs_key_ct_labels {
 	};
 };
 
-struct ovs_key_nsh {
-    __u8 flags;
-    __u8 mdtype;
-    __u8 np;
-    __u8 pad;
-    __be32 path_hdr;
-    __be32 c[4];
-};
-
 /* OVS_KEY_ATTR_CT_STATE flags */
 #define OVS_CS_F_NEW               0x01 /* Beginning of a new connection. */
 #define OVS_CS_F_ESTABLISHED       0x02 /* Part of an existing connection. */
@@ -793,10 +784,10 @@  struct ovs_action_push_eth {
 	struct ovs_key_ethernet addresses;
 };
 
-#define OVS_ENCAP_NSH_MAX_MD_LEN 16
 /*
  * struct ovs_action_encap_nsh - %OVS_ACTION_ATTR_ENCAP_NSH
  * @flags: NSH header flags.
+ * @ttl: NSH header TTL.
  * @mdtype: NSH metadata type.
  * @mdlen: Length of NSH metadata in bytes.
  * @np: NSH next_protocol: Inner packet type.
@@ -805,11 +796,13 @@  struct ovs_action_push_eth {
  */
 struct ovs_action_encap_nsh {
     uint8_t flags;
+    uint8_t ttl;
     uint8_t mdtype;
-    uint8_t mdlen;
     uint8_t np;
     __be32 path_hdr;
-    uint8_t metadata[OVS_ENCAP_NSH_MAX_MD_LEN];
+    uint8_t mdlen;
+    uint8_t pad[7]; /* Aligned to 64 bit boundary for metadata */
+    uint8_t metadata[];
 };
 
 /**
diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
index a658a58..cd61fff 100644
--- a/include/openvswitch/flow.h
+++ b/include/openvswitch/flow.h
@@ -146,7 +146,7 @@  struct flow {
     struct eth_addr arp_tha;    /* ARP/ND target hardware address. */
     ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching L4. */
     ovs_be16 pad2;              /* Pad to 64 bits. */
-    struct flow_nsh nsh;        /* Network Service Header keys */
+    struct ovs_key_nsh nsh;     /* Network Service Header keys */
 
     /* L4 (64-bit aligned) */
     ovs_be16 tp_src;            /* TCP/UDP/SCTP source port/ICMP type. */
@@ -159,13 +159,13 @@  struct flow {
 };
 BUILD_ASSERT_DECL(sizeof(struct flow) % sizeof(uint64_t) == 0);
 BUILD_ASSERT_DECL(sizeof(struct flow_tnl) % sizeof(uint64_t) == 0);
-BUILD_ASSERT_DECL(sizeof(struct flow_nsh) % sizeof(uint64_t) == 0);
+BUILD_ASSERT_DECL(sizeof(struct ovs_key_nsh) % sizeof(uint64_t) == 0);
 
 #define FLOW_U64S (sizeof(struct flow) / sizeof(uint64_t))
 
 /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
 BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
-                  == sizeof(struct flow_tnl) + sizeof(struct flow_nsh) + 300
+                  == sizeof(struct flow_tnl) + sizeof(struct ovs_key_nsh) + 300
                   && FLOW_WC_SEQ == 40);
 
 /* Incremental points at which flow classification may be performed in
diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index 436501f..14e6b59 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -1757,6 +1757,21 @@  enum OVS_PACKED_ENUM mf_field_id {
      */
     MFF_NSH_FLAGS,
 
+    /* "nsh_ttl".
+     *
+     * TTL field in NSH base header.
+     *
+     * Type: u8.
+     * Maskable: no.
+     * Formatting: decimal.
+     * Prerequisites: NSH.
+     * Access: read/write.
+     * NXM: none.
+     * OXM: NXOXM_NSH_TTL(2) since OF1.3 and v2.8.
+     */
+    MFF_NSH_TTL,
+
+
     /* "nsh_mdtype".
      *
      * mdtype field in NSH base header.
@@ -1767,7 +1782,7 @@  enum OVS_PACKED_ENUM mf_field_id {
      * Prerequisites: NSH.
      * Access: read-only.
      * NXM: none.
-     * OXM: NXOXM_NSH_MDTYPE(2) since OF1.3 and v2.8.
+     * OXM: NXOXM_NSH_MDTYPE(3) since OF1.3 and v2.8.
      */
     MFF_NSH_MDTYPE,
 
@@ -1781,7 +1796,7 @@  enum OVS_PACKED_ENUM mf_field_id {
      * Prerequisites: NSH.
      * Access: read-only.
      * NXM: none.
-     * OXM: NXOXM_NSH_NP(3) since OF1.3 and v2.8.
+     * OXM: NXOXM_NSH_NP(4) since OF1.3 and v2.8.
      */
     MFF_NSH_NP,
 
@@ -1795,7 +1810,7 @@  enum OVS_PACKED_ENUM mf_field_id {
      * Prerequisites: NSH.
      * Access: read/write.
      * NXM: none.
-     * OXM: NXOXM_NSH_SPI(4) since OF1.3 and v2.8.
+     * OXM: NXOXM_NSH_SPI(5) since OF1.3 and v2.8.
      */
     MFF_NSH_SPI,
 
@@ -1809,7 +1824,7 @@  enum OVS_PACKED_ENUM mf_field_id {
      * Prerequisites: NSH.
      * Access: read/write.
      * NXM: none.
-     * OXM: NXOXM_NSH_SI(5) since OF1.3 and v2.8.
+     * OXM: NXOXM_NSH_SI(6) since OF1.3 and v2.8.
      */
     MFF_NSH_SI,
 
@@ -1823,10 +1838,10 @@  enum OVS_PACKED_ENUM mf_field_id {
      * Prerequisites: NSH.
      * Access: read/write.
      * NXM: none.
-     * OXM: NXOXM_NSH_C1(6) since OF1.3 and v2.8.        <1>
-     * OXM: NXOXM_NSH_C2(7) since OF1.3 and v2.8.        <2>
-     * OXM: NXOXM_NSH_C3(8) since OF1.3 and v2.8.        <3>
-     * OXM: NXOXM_NSH_C4(9) since OF1.3 and v2.8.        <4>
+     * OXM: NXOXM_NSH_C1(7) since OF1.3 and v2.8.        <1>
+     * OXM: NXOXM_NSH_C2(8) since OF1.3 and v2.8.        <2>
+     * OXM: NXOXM_NSH_C3(9) since OF1.3 and v2.8.        <3>
+     * OXM: NXOXM_NSH_C4(10) since OF1.3 and v2.8.        <4>
      */
     MFF_NSH_C1,
     MFF_NSH_C2,
diff --git a/include/openvswitch/nsh.h b/include/openvswitch/nsh.h
index a3611d0..0231f68 100644
--- a/include/openvswitch/nsh.h
+++ b/include/openvswitch/nsh.h
@@ -5,41 +5,189 @@ 
 
 /*
  * Network Service Header:
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |Ver|O|C|R|R|R|R|R|R|    Length   |   MD Type   |  Next Proto   |
+ * |Ver|O|U|    TTL    |   Length  |U|U|U|U|MD Type| Next Protocol |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |                Service Path ID                | Service Index |
+ * |          Service Path Identifier (SPI)        | Service Index |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                                                               |
- * ~               Mandatory/Optional Context Header               ~
+ * ~               Mandatory/Optional Context Headers              ~
  * |                                                               |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * Ver = The version field is used to ensure backward compatibility
- *       going forward with future NSH updates.  It MUST be set to 0x0
- *       by the sender, in this first revision of NSH.
  *
- * O = OAM. when set to 0x1 indicates that this packet is an operations
- *     and management (OAM) packet.  The receiving SFF and SFs nodes
- *     MUST examine the payload and take appropriate action.
+ * Version: The version field is used to ensure backward compatibility
+ * going forward with future NSH specification updates.  It MUST be set
+ * to 0x0 by the sender, in this first revision of NSH.  Given the
+ * widespread implementation of existing hardware that uses the first
+ * nibble after an MPLS label stack for ECMP decision processing, this
+ * document reserves version 01b and this value MUST NOT be used in
+ * future versions of the protocol.  Please see [RFC7325] for further
+ * discussion of MPLS-related forwarding requirements.
  *
- * C = context. Indicates that a critical metadata TLV is present.
+ * O bit: Setting this bit indicates an Operations, Administration, and
+ * Maintenance (OAM) packet.  The actual format and processing of SFC
+ * OAM packets is outside the scope of this specification (see for
+ * example [I-D.ietf-sfc-oam-framework] for one approach).
  *
- * Length : total length, in 4-byte words, of NSH including the Base
- *          Header, the Service Path Header and the optional variable
- *          TLVs.
- * MD Type: indicates the format of NSH beyond the mandatory Base Header
- *          and the Service Path Header.
+ * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM
+ * packets.  The O bit MUST NOT be modified along the SFP.
  *
- * Next Protocol: indicates the protocol type of the original packet. A
- *          new IANA registry will be created for protocol type.
+ * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC
+ * OAM procedures SHOULD discard packets with O bit set, but MAY support
+ * a configurable parameter to enable forwarding received SFC OAM
+ * packets unmodified to the next element in the chain.  Forwarding OAM
+ * packets unmodified by SFC elements that do not support SFC OAM
+ * procedures may be acceptable for a subset of OAM functions, but can
+ * result in unexpected outcomes for others, thus it is recommended to
+ * analyze the impact of forwarding an OAM packet for all OAM functions
+ * prior to enabling this behavior.  The configurable parameter MUST be
+ * disabled by default.
  *
- * Service Path Identifier (SPI): identifies a service path.
- *          Participating nodes MUST use this identifier for Service
- *          Function Path selection.
+ * TTL: Indicates the maximum SFF hops for an SFP.  This field is used
+ * for service plane loop detection.  The initial TTL value SHOULD be
+ * configurable via the control plane; the configured initial value can
+ * be specific to one or more SFPs.  If no initial value is explicitly
+ * provided, the default initial TTL value of 63 MUST be used.  Each SFF
+ * involved in forwarding an NSH packet MUST decrement the TTL value by
+ * 1 prior to NSH forwarding lookup.  Decrementing by 1 from an incoming
+ * value of 0 shall result in a TTL value of 63.  The packet MUST NOT be
+ * forwarded if TTL is, after decrement, 0.
  *
- * Service Index (SI): provides location within the SFP.
+ * All other flag fields, marked U, are unassigned and available for
+ * future use, see Section 11.2.1.  Unassigned bits MUST be set to zero
+ * upon origination, and MUST be ignored and preserved unmodified by
+ * other NSH supporting elements.  Elements which do not understand the
+ * meaning of any of these bits MUST NOT modify their actions based on
+ * those unknown bits.
  *
- * [0] https://tools.ietf.org/html/draft-ietf-sfc-nsh-13
+ * Length: The total length, in 4-byte words, of NSH including the Base
+ * Header, the Service Path Header, the Fixed Length Context Header or
+ * Variable Length Context Header(s).  The length MUST be 0x6 for MD
+ * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to
+ * 0x2.  The length of the NSH header MUST be an integer multiple of 4
+ * bytes, thus variable length metadata is always padded out to a
+ * multiple of 4 bytes.
+ *
+ * MD Type: Indicates the format of NSH beyond the mandatory Base Header
+ * and the Service Path Header.  MD Type defines the format of the
+ * metadata being carried.
+ *
+ * 0x0 - This is a reserved value.  Implementations SHOULD silently
+ * discard packets with MD Type 0x0.
+ *
+ * 0x1 - This indicates that the format of the header includes a fixed
+ * length Context Header (see Figure 4 below).
+ *
+ * 0x2 - This does not mandate any headers beyond the Base Header and
+ * Service Path Header, but may contain optional variable length Context
+ * Header(s).  The semantics of the variable length Context Header(s)
+ * are not defined in this document.  The format of the optional
+ * variable length Context Headers is provided in Section 2.5.1.
+ *
+ * 0xF - This value is reserved for experimentation and testing, as per
+ * [RFC3692].  Implementations not explicitly configured to be part of
+ * an experiment SHOULD silently discard packets with MD Type 0xF.
+ *
+ * Next Protocol: indicates the protocol type of the encapsulated data.
+ * NSH does not alter the inner payload, and the semantics on the inner
+ * protocol remain unchanged due to NSH service function chaining.
+ * Please see the IANA Considerations section below, Section 11.2.5.
+ *
+ * This document defines the following Next Protocol values:
+ *
+ * 0x1: IPv4
+ * 0x2: IPv6
+ * 0x3: Ethernet
+ * 0x4: NSH
+ * 0x5: MPLS
+ * 0xFE: Experiment 1
+ * 0xFF: Experiment 2
+ *
+ * Packets with Next Protocol values not supported SHOULD be silently
+ * dropped by default, although an implementation MAY provide a
+ * configuration parameter to forward them.  Additionally, an
+ * implementation not explicitly configured for a specific experiment
+ * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE
+ * and 0xFF.
+ *
+ * Service Path Identifier (SPI): Identifies a service path.
+ * Participating nodes MUST use this identifier for Service Function
+ * Path selection.  The initial classifier MUST set the appropriate SPI
+ * for a given classification result.
+ *
+ * Service Index (SI): Provides location within the SFP.  The initial
+ * classifier for a given SFP SHOULD set the SI to 255, however the
+ * control plane MAY configure the initial value of SI as appropriate
+ * (i.e., taking into account the length of the service function path).
+ * The Service Index MUST be decremented by a value of 1 by Service
+ * Functions or by SFC Proxy nodes after performing required services
+ * and the new decremented SI value MUST be used in the egress packet's
+ * NSH.  The initial Classifier MUST send the packet to the first SFF in
+ * the identified SFP for forwarding along an SFP.  If re-classification
+ * occurs, and that re-classification results in a new SPI, the
+ * (re)classifier is, in effect, the initial classifier for the
+ * resultant SPI.
+ *
+ * The SI is used in conjunction the with Service Path Identifier for
+ * Service Function Path Selection and for determining the next SFF/SF
+ * in the path.  The SI is also valuable when troubleshooting or
+ * reporting service paths.  Additionally, while the TTL field is the
+ * main mechanism for service plane loop detection, the SI can also be
+ * used for detecting service plane loops.
+ *
+ * When the Base Header specifies MD Type = 0x1, a Fixed Length Context
+ * Header (16-bytes) MUST be present immediately following the Service
+ * Path Header. The value of a Fixed Length Context
+ * Header that carries no metadata MUST be set to zero.
+ *
+ * When the base header specifies MD Type = 0x2, zero or more Variable
+ * Length Context Headers MAY be added, immediately following the
+ * Service Path Header (see Figure 5).  Therefore, Length = 0x2,
+ * indicates that only the Base Header followed by the Service Path
+ * Header are present.  The optional Variable Length Context Headers
+ * MUST be of an integer number of 4-bytes.  The base header Length
+ * field MUST be used to determine the offset to locate the original
+ * packet or frame for SFC nodes that require access to that
+ * information.
+ *
+ * The format of the optional variable length Context Headers
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Metadata Class       |      Type     |U|    Length   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Variable Metadata                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Metadata Class (MD Class): Defines the scope of the 'Type' field to
+ * provide a hierarchical namespace.  The IANA Considerations
+ * Section 11.2.4 defines how the MD Class values can be allocated to
+ * standards bodies, vendors, and others.
+ *
+ * Type: Indicates the explicit type of metadata being carried.  The
+ * definition of the Type is the responsibility of the MD Class owner.
+ *
+ * Unassigned bit: One unassigned bit is available for future use. This
+ * bit MUST NOT be set, and MUST be ignored on receipt.
+ *
+ * Length: Indicates the length of the variable metadata, in bytes.  In
+ * case the metadata length is not an integer number of 4-byte words,
+ * the sender MUST add pad bytes immediately following the last metadata
+ * byte to extend the metadata to an integer number of 4-byte words.
+ * The receiver MUST round up the length field to the nearest 4-byte
+ * word boundary, to locate and process the next field in the packet.
+ * The receiver MUST access only those bytes in the metadata indicated
+ * by the length field (i.e., actual number of bytes) and MUST ignore
+ * the remaining bytes up to the nearest 4-byte word boundary.  The
+ * Length may be 0 or greater.
+ *
+ * A value of 0 denotes a Context Header without a Variable Metadata
+ * field.
+ *
+ * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/
  */
 
 #ifdef  __cplusplus
@@ -62,7 +210,7 @@  struct nsh_md2_tlv {
 };
 
 struct nsh_hdr {
-    ovs_be16 ver_flags_len;
+    ovs_be16 ver_flags_ttl_len;
     uint8_t md_type;
     uint8_t next_proto;
     ovs_16aligned_be32 path_hdr;
@@ -75,11 +223,16 @@  struct nsh_hdr {
 /* Masking NSH header fields. */
 #define NSH_VER_MASK       0xc000
 #define NSH_VER_SHIFT      14
-#define NSH_FLAGS_MASK     0x3fc0
-#define NSH_FLAGS_SHIFT    6
+#define NSH_FLAGS_MASK     0x3000
+#define NSH_FLAGS_SHIFT    12
+#define NSH_TTL_MASK       0x0fc0
+#define NSH_TTL_SHIFT      6
 #define NSH_LEN_MASK       0x003f
 #define NSH_LEN_SHIFT      0
 
+#define NSH_MDTYPE_MASK    0x0f
+#define NSH_MDTYPE_SHIFT   0
+
 #define NSH_SPI_MASK       0xffffff00
 #define NSH_SPI_SHIFT      8
 #define NSH_SI_MASK        0x000000ff
@@ -110,10 +263,14 @@  struct nsh_hdr {
 /* NSH MD Type 1 header Length. */
 #define NSH_M_TYPE1_LEN   24
 
+/* NSH Context headers Max Length. */
+#define NSH_CTX_HDRS_MAX_LEN 248
+
 static inline uint16_t
 nsh_hdr_len(const struct nsh_hdr *nsh)
 {
-    return ((ntohs(nsh->ver_flags_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT) << 2;
+    return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK)
+                >> NSH_LEN_SHIFT) << 2;
 }
 
 static inline struct nsh_md1_ctx *
@@ -128,6 +285,123 @@  nsh_md2_ctx(struct nsh_hdr *nsh)
     return &nsh->md2;
 }
 
+static inline uint8_t
+nsh_get_ver(const struct nsh_hdr *nsh)
+{
+    return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK) >> NSH_VER_SHIFT;
+}
+
+static inline uint8_t
+nsh_get_len(const struct nsh_hdr *nsh)
+{
+    return (ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK) >> NSH_LEN_SHIFT;
+}
+
+static inline uint8_t
+nsh_get_flags(const struct nsh_hdr *nsh)
+{
+    return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK) >> NSH_FLAGS_SHIFT;
+}
+
+static inline uint8_t
+nsh_get_ttl(const struct nsh_hdr *nsh)
+{
+    return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK) >> NSH_TTL_SHIFT;
+}
+
+static inline ovs_be32
+nsh_16aligned_be32(const ovs_16aligned_be32 *x)
+{
+#ifdef WORDS_BIGENDIAN
+    return ((ovs_be32) x->hi << 16) | x->lo;
+#else
+    return ((ovs_be32) x->lo << 16) | x->hi;
+#endif
+}
+
+static inline ovs_be32
+nsh_get_path_hdr(const struct nsh_hdr *nsh)
+{
+    return nsh_16aligned_be32(&nsh->path_hdr);
+}
+
+static inline ovs_be32
+nsh_get_spi(const struct nsh_hdr *nsh)
+{
+    ovs_be32 path_hdr = ntohl(nsh_16aligned_be32(&nsh->path_hdr));
+    return htonl((path_hdr & NSH_SPI_MASK) >> NSH_SPI_SHIFT);
+}
+
+static inline uint8_t
+nsh_get_si(const struct nsh_hdr *nsh)
+{
+    ovs_be32 path_hdr = ntohl(nsh_16aligned_be32(&nsh->path_hdr));
+    return (path_hdr & NSH_SI_MASK) >> NSH_SI_SHIFT;
+}
+
+static inline ovs_be32
+nsh_path_hdr_to_spi(ovs_be32 path_hdr)
+{
+    return htonl((ntohl(path_hdr) & NSH_SPI_MASK) >> NSH_SPI_SHIFT);
+}
+
+static inline uint32_t
+nsh_path_hdr_to_spi_uint32(ovs_be32 path_hdr)
+{
+    return (ntohl(path_hdr) & NSH_SPI_MASK) >> NSH_SPI_SHIFT;
+}
+
+static inline uint8_t
+nsh_path_hdr_to_si(ovs_be32 path_hdr)
+{
+    return (ntohl(path_hdr) & NSH_SI_MASK) >> NSH_SI_SHIFT;
+}
+
+static inline ovs_be32
+nsh_spi_si_to_path_hdr(uint32_t spi, uint8_t si)
+{
+    return htonl((spi << NSH_SPI_SHIFT) | si);
+}
+
+static inline void
+__nsh_set_xflag(struct nsh_hdr *nsh, uint16_t xflag, uint16_t xmask)
+{
+    nsh->ver_flags_ttl_len
+        = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
+}
+
+static inline void nsh_set_flags_and_ttl(struct nsh_hdr *nsh, uint8_t flags,
+                                         uint8_t ttl)
+{
+    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
+                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
+                    NSH_FLAGS_MASK | NSH_TTL_MASK);
+}
+
+static inline void nsh_set_flags_ttl_len(struct nsh_hdr *nsh, uint8_t flags,
+                                         uint8_t ttl, uint8_t len)
+{
+    len = len >> 2;
+    __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
+                         ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
+                         ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
+                    NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
+}
+
+static inline void
+nsh_path_hdr_set_spi(ovs_be32 *path_hdr, ovs_be32 spi)
+{
+    *path_hdr = htonl((ntohl(*path_hdr) & ~NSH_SPI_MASK) |
+                      ((ntohl(spi) << NSH_SPI_SHIFT) & NSH_SPI_MASK));
+}
+
+static inline void
+nsh_path_hdr_set_si(ovs_be32 *path_hdr, uint8_t si)
+{
+    *path_hdr = htonl((ntohl(*path_hdr) & ~NSH_SI_MASK) |
+                      ((si << NSH_SI_SHIFT) & NSH_SI_MASK));
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/include/openvswitch/packets.h b/include/openvswitch/packets.h
index be91e02..603ec44 100644
--- a/include/openvswitch/packets.h
+++ b/include/openvswitch/packets.h
@@ -73,24 +73,18 @@  union flow_vlan_hdr {
     };
 };
 
-#ifdef __cplusplus
-}
-#endif
-
 /* Network Service Header keys */
-struct flow_nsh {
+struct ovs_key_nsh {
     uint8_t flags;
+    uint8_t ttl;
     uint8_t mdtype;
     uint8_t np;
-    uint8_t si;
-    ovs_be32 spi;
+    ovs_be32 path_hdr;
     ovs_be32 c[4];
 };
 
-/* NSH flags */
-#define FLOW_NSH_F_OAM (1 << 0)
-#define FLOW_NSH_F_CTX (1 << 1)
-
-#define FLOW_NSH_F_MASK ((1 << 2) - 1)
+#ifdef __cplusplus
+}
+#endif
 
 #endif /* packets.h */
diff --git a/lib/flow.c b/lib/flow.c
index b2b10aa..547ff70 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -530,53 +530,52 @@  parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
 }
 
 bool
-parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key)
+parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key)
 {
     const struct nsh_hdr *nsh = (const struct nsh_hdr *) *datap;
-    uint16_t ver_flags_len;
-    uint8_t version, length, flags;
-    uint32_t path_hdr;
+    uint8_t version, length, flags, ttl;
 
-    /* Check if it is long enough for NSH header, doesn't support
-     * MD type 2 yet
-     */
     if (OVS_UNLIKELY(*sizep < NSH_M_TYPE1_LEN)) {
         return false;
     }
 
-    memset(key, 0, sizeof(struct flow_nsh));
+    version = nsh_get_ver(nsh);
+    flags = nsh_get_flags(nsh);
+    ttl = nsh_get_ttl(nsh);
 
-    ver_flags_len = ntohs(nsh->ver_flags_len);
-    version = (ver_flags_len & NSH_VER_MASK) >> NSH_VER_SHIFT;
-    flags = (ver_flags_len & NSH_FLAGS_MASK) >> NSH_FLAGS_SHIFT;
-
-    /* NSH header length is in 4 byte words. */
-    length = ((ver_flags_len & NSH_LEN_MASK) >> NSH_LEN_SHIFT) << 2;
+    length = nsh_hdr_len(nsh);
 
     if (version != 0) {
         return false;
     }
 
-    if (length != NSH_M_TYPE1_LEN) {
-        return false;
+    if (OVS_UNLIKELY(*sizep < length)) {
+         return false;
     }
 
     key->flags = flags;
+    key->ttl = ttl;
     key->mdtype = nsh->md_type;
     key->np = nsh->next_proto;
-
-    path_hdr = ntohl(get_16aligned_be32(&nsh->path_hdr));
-    key->si = (path_hdr & NSH_SI_MASK) >> NSH_SI_SHIFT;
-    key->spi = htonl((path_hdr & NSH_SPI_MASK) >> NSH_SPI_SHIFT);
+    key->path_hdr = get_16aligned_be32(&nsh->path_hdr);
 
     switch (key->mdtype) {
         case NSH_M_TYPE1:
+            if (length != NSH_M_TYPE1_LEN) {
+                return false;
+            }
             for (size_t i = 0; i < 4; i++) {
                 key->c[i] = get_16aligned_be32(&nsh->md1.c[i]);
             }
             break;
         case NSH_M_TYPE2:
-            /* Don't support MD type 2 yet, so return false */
+            /* Don't support MD type 2 metedata parsing yet */
+            if (length < NSH_BASE_HDR_LEN) {
+                return false;
+            }
+
+            memset(key->c, 0, sizeof(key->c));
+            break;
         default:
             return false;
     }
@@ -876,19 +875,12 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_pad_to_64(mf, arp_tha);
             }
         } else if (dl_type == htons(ETH_TYPE_NSH)) {
-            struct flow_nsh nsh;
+            struct ovs_key_nsh nsh;
 
             if (OVS_LIKELY(parse_nsh(&data, &size, &nsh))) {
-                if (nsh.mdtype == NSH_M_TYPE1) {
-                    miniflow_push_words(mf, nsh, &nsh,
-                                        sizeof(struct flow_nsh) /
-                                        sizeof(uint64_t));
-                }
-                else if (nsh.mdtype == NSH_M_TYPE2) {
-                    /* parse_nsh has stopped it from arriving here for
-                     * MD type 2, will add MD type 2 support code here later
-                     */
-                }
+                miniflow_push_words(mf, nsh, &nsh,
+                                    sizeof(struct ovs_key_nsh) /
+                                    sizeof(uint64_t));
             }
         }
         goto out;
@@ -1688,10 +1680,10 @@  flow_wildcards_init_for_packet(struct flow_wildcards *wc,
         return;
     } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
         WC_MASK_FIELD(wc, nsh.flags);
+        WC_MASK_FIELD(wc, nsh.ttl);
         WC_MASK_FIELD(wc, nsh.mdtype);
         WC_MASK_FIELD(wc, nsh.np);
-        WC_MASK_FIELD(wc, nsh.spi);
-        WC_MASK_FIELD(wc, nsh.si);
+        WC_MASK_FIELD(wc, nsh.path_hdr);
         WC_MASK_FIELD(wc, nsh.c);
     } else {
         return; /* Unknown ethertype. */
@@ -1822,10 +1814,10 @@  flow_wc_map(const struct flow *flow, struct flowmap *map)
         FLOWMAP_SET(map, arp_tha);
     } else if (flow->dl_type == htons(ETH_TYPE_NSH)) {
         FLOWMAP_SET(map, nsh.flags);
+        FLOWMAP_SET(map, nsh.ttl);
         FLOWMAP_SET(map, nsh.mdtype);
         FLOWMAP_SET(map, nsh.np);
-        FLOWMAP_SET(map, nsh.spi);
-        FLOWMAP_SET(map, nsh.si);
+        FLOWMAP_SET(map, nsh.path_hdr);
         FLOWMAP_SET(map, nsh.c);
     }
 }
diff --git a/lib/flow.h b/lib/flow.h
index 6ae5a67..42a8426 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -129,7 +129,7 @@  bool flow_compose(struct dp_packet *, const struct flow *, size_t);
 bool parse_ipv6_ext_hdrs(const void **datap, size_t *sizep, uint8_t *nw_proto,
                          uint8_t *nw_frag);
 ovs_be16 parse_dl_type(const struct eth_header *data_, size_t size);
-bool parse_nsh(const void **datap, size_t *sizep, struct flow_nsh *key);
+bool parse_nsh(const void **datap, size_t *sizep, struct ovs_key_nsh *key);
 
 static inline uint64_t
 flow_get_xreg(const struct flow *flow, int idx)
diff --git a/lib/match.c b/lib/match.c
index 36c78eb..f65e661 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -1260,11 +1260,19 @@  format_ct_label_masked(struct ds *s, const ovs_u128 *key, const ovs_u128 *mask)
 static void
 format_nsh_masked(struct ds *s, const struct flow *f, const struct flow *m)
 {
+    ovs_be32 spi_mask = nsh_path_hdr_to_spi(m->nsh.path_hdr);
+    if (spi_mask == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
+        spi_mask = OVS_BE32_MAX;
+    }
     format_uint8_masked(s, "nsh_flags", f->nsh.flags, m->nsh.flags);
+    format_uint8_masked(s, "nsh_ttl", f->nsh.ttl, m->nsh.ttl);
     format_uint8_masked(s, "nsh_mdtype", f->nsh.mdtype, m->nsh.mdtype);
     format_uint8_masked(s, "nsh_np", f->nsh.np, m->nsh.np);
-    format_be32_masked_hex(s, "nsh_spi", f->nsh.spi, m->nsh.spi);
-    format_uint8_masked(s, "nsh_si", f->nsh.si, m->nsh.si);
+
+    format_be32_masked_hex(s, "nsh_spi", nsh_path_hdr_to_spi(f->nsh.path_hdr),
+                           spi_mask);
+    format_uint8_masked(s, "nsh_si", nsh_path_hdr_to_si(f->nsh.path_hdr),
+                        nsh_path_hdr_to_si(m->nsh.path_hdr));
     if (m->nsh.mdtype == UINT8_MAX && f->nsh.mdtype == NSH_M_TYPE1) {
         format_be32_masked_hex(s, "nsh_c1", f->nsh.c[0], m->nsh.c[0]);
         format_be32_masked_hex(s, "nsh_c2", f->nsh.c[1], m->nsh.c[1]);
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 64a8cf1..b50679c 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -40,6 +40,7 @@ 
 #include "openvswitch/ofp-errors.h"
 #include "openvswitch/vlog.h"
 #include "vl-mff-map.h"
+#include "openvswitch/nsh.h"
 
 VLOG_DEFINE_THIS_MODULE(meta_flow);
 
@@ -361,14 +362,16 @@  mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
 
     case MFF_NSH_FLAGS:
         return !wc->masks.nsh.flags;
+    case MFF_NSH_TTL:
+        return !wc->masks.nsh.ttl;
     case MFF_NSH_MDTYPE:
         return !wc->masks.nsh.mdtype;
     case MFF_NSH_NP:
         return !wc->masks.nsh.np;
     case MFF_NSH_SPI:
-        return !wc->masks.nsh.spi;
+        return !(wc->masks.nsh.path_hdr & htonl(NSH_SPI_MASK));
     case MFF_NSH_SI:
-        return !wc->masks.nsh.si;
+        return !(wc->masks.nsh.path_hdr & htonl(NSH_SI_MASK));
     case MFF_NSH_C1:
     case MFF_NSH_C2:
     case MFF_NSH_C3:
@@ -606,6 +609,8 @@  mf_is_value_valid(const struct mf_field *mf, const union mf_value *value)
 
     case MFF_NSH_FLAGS:
         return true;
+    case MFF_NSH_TTL:
+        return (value->u8 <= 63);
     case MFF_NSH_MDTYPE:
         return (value->u8 == 1 || value->u8 == 2);
     case MFF_NSH_NP:
@@ -899,6 +904,9 @@  mf_get_value(const struct mf_field *mf, const struct flow *flow,
     case MFF_NSH_FLAGS:
         value->u8 = flow->nsh.flags;
         break;
+    case MFF_NSH_TTL:
+        value->u8 = flow->nsh.ttl;
+        break;
     case MFF_NSH_MDTYPE:
         value->u8 = flow->nsh.mdtype;
         break;
@@ -906,10 +914,13 @@  mf_get_value(const struct mf_field *mf, const struct flow *flow,
         value->u8 = flow->nsh.np;
         break;
     case MFF_NSH_SPI:
-        value->be32 = flow->nsh.spi;
+        value->be32 = nsh_path_hdr_to_spi(flow->nsh.path_hdr);
+        if (value->be32 == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
+            value->be32 = OVS_BE32_MAX;
+        }
         break;
     case MFF_NSH_SI:
-        value->u8 = flow->nsh.si;
+        value->u8 = nsh_path_hdr_to_si(flow->nsh.path_hdr);
         break;
     case MFF_NSH_C1:
     case MFF_NSH_C2:
@@ -1214,6 +1225,9 @@  mf_set_value(const struct mf_field *mf,
     case MFF_NSH_FLAGS:
         MATCH_SET_FIELD_UINT8(match, nsh.flags, value->u8);
         break;
+    case MFF_NSH_TTL:
+        MATCH_SET_FIELD_UINT8(match, nsh.ttl, value->u8);
+        break;
     case MFF_NSH_MDTYPE:
         MATCH_SET_FIELD_UINT8(match, nsh.mdtype, value->u8);
         break;
@@ -1221,10 +1235,12 @@  mf_set_value(const struct mf_field *mf,
         MATCH_SET_FIELD_UINT8(match, nsh.np, value->u8);
         break;
     case MFF_NSH_SPI:
-        MATCH_SET_FIELD_BE32(match, nsh.spi, value->be32);
+        match->wc.masks.nsh.path_hdr |= htonl(NSH_SPI_MASK);
+        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr, value->be32);
         break;
     case MFF_NSH_SI:
-        MATCH_SET_FIELD_UINT8(match, nsh.si, value->u8);
+        match->wc.masks.nsh.path_hdr |= htonl(NSH_SI_MASK);
+        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr, value->u8);
         break;
     case MFF_NSH_C1:
     case MFF_NSH_C2:
@@ -1605,6 +1621,9 @@  mf_set_flow_value(const struct mf_field *mf,
     case MFF_NSH_FLAGS:
         flow->nsh.flags = value->u8;
         break;
+    case MFF_NSH_TTL:
+        flow->nsh.ttl = value->u8;
+        break;
     case MFF_NSH_MDTYPE:
         flow->nsh.mdtype = value->u8;
         break;
@@ -1612,10 +1631,10 @@  mf_set_flow_value(const struct mf_field *mf,
         flow->nsh.np = value->u8;
         break;
     case MFF_NSH_SPI:
-        flow->nsh.spi = value->be32;
+        nsh_path_hdr_set_spi(&flow->nsh.path_hdr, value->be32);
         break;
     case MFF_NSH_SI:
-        flow->nsh.si = value->u8;
+        nsh_path_hdr_set_si(&flow->nsh.path_hdr, value->u8);
         break;
     case MFF_NSH_C1:
     case MFF_NSH_C2:
@@ -1751,6 +1770,7 @@  mf_is_pipeline_field(const struct mf_field *mf)
     case MFF_ND_SLL:
     case MFF_ND_TLL:
     case MFF_NSH_FLAGS:
+    case MFF_NSH_TTL:
     case MFF_NSH_MDTYPE:
     case MFF_NSH_NP:
     case MFF_NSH_SPI:
@@ -2096,6 +2116,9 @@  mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
     case MFF_NSH_FLAGS:
         MATCH_SET_FIELD_MASKED(match, nsh.flags, 0, 0);
         break;
+    case MFF_NSH_TTL:
+        MATCH_SET_FIELD_MASKED(match, nsh.ttl, 0, 0);
+        break;
     case MFF_NSH_MDTYPE:
         MATCH_SET_FIELD_MASKED(match, nsh.mdtype, 0, 0);
         break;
@@ -2103,10 +2126,12 @@  mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
         MATCH_SET_FIELD_MASKED(match, nsh.np, 0, 0);
         break;
     case MFF_NSH_SPI:
-        MATCH_SET_FIELD_MASKED(match, nsh.spi, htonl(0), htonl(0));
+        match->wc.masks.nsh.path_hdr &= ~htonl(NSH_SPI_MASK);
+        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr, htonl(0));
         break;
     case MFF_NSH_SI:
-        MATCH_SET_FIELD_MASKED(match, nsh.si, 0, 0);
+        match->wc.masks.nsh.path_hdr &= ~htonl(NSH_SI_MASK);
+        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr, 0);
         break;
     case MFF_NSH_C1:
     case MFF_NSH_C2:
@@ -2356,6 +2381,9 @@  mf_set(const struct mf_field *mf,
     case MFF_NSH_FLAGS:
         MATCH_SET_FIELD_MASKED(match, nsh.flags, value->u8, mask->u8);
         break;
+    case MFF_NSH_TTL:
+        MATCH_SET_FIELD_MASKED(match, nsh.ttl, value->u8, mask->u8);
+        break;
     case MFF_NSH_MDTYPE:
         MATCH_SET_FIELD_MASKED(match, nsh.mdtype, value->u8, mask->u8);
         break;
@@ -2363,10 +2391,14 @@  mf_set(const struct mf_field *mf,
         MATCH_SET_FIELD_MASKED(match, nsh.np, value->u8, mask->u8);
         break;
     case MFF_NSH_SPI:
-        MATCH_SET_FIELD_MASKED(match, nsh.spi, value->be32, mask->be32);
+        match->wc.masks.nsh.path_hdr |= mask->be32;
+        nsh_path_hdr_set_spi(&match->flow.nsh.path_hdr,
+                             value->be32 & mask->be32);
         break;
     case MFF_NSH_SI:
-        MATCH_SET_FIELD_MASKED(match, nsh.si, value->u8, mask->u8);
+        match->wc.masks.nsh.path_hdr |= htonl(mask->u8);
+        nsh_path_hdr_set_si(&match->flow.nsh.path_hdr,
+                             value->u8 & mask->u8);
         break;
     case MFF_NSH_C1:
     case MFF_NSH_C2:
diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml
index 065fb03..a4a0735 100644
--- a/lib/meta-flow.xml
+++ b/lib/meta-flow.xml
@@ -1311,7 +1311,9 @@  tcp,tp_src=0x07c0/0xfff0
 
   <group title="Network Service Header">
     <field id="MFF_NSH_FLAGS"
-        title="flags field (8 bits)"/>
+        title="flags field (2 bits)"/>
+    <field id="MFF_NSH_TTL"
+        title="TTL field (6 bits)"/>
     <field id="MFF_NSH_MDTYPE"
         title="mdtype field (8 bits)"/>
     <field id="MFF_NSH_NP"
@@ -3963,7 +3965,7 @@  r r c c c.
       listen for <code>OFPR_INVALID_TTL</code> ``packet-in'' messages via
       OpenFlow.
     </field>
-    
+
     <field id="MFF_IP_FRAG" title="IPv4/v6 Fragment Bitmask">
       <p>
         Specifies what kinds of IP fragments or non-fragments to match.  The
diff --git a/lib/nx-match.c b/lib/nx-match.c
index b782e8c..dc52566 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -1022,6 +1022,7 @@  nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
     const struct flow *flow = &match->flow;
     const size_t start_len = b->size;
     ovs_be16 dl_type = get_dl_type(flow);
+    ovs_be32 spi_mask;
     int match_len;
     int i;
 
@@ -1157,13 +1158,22 @@  nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
     /* Network Service Header */
     nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags,
             match->wc.masks.nsh.flags);
+    nxm_put_8m(&ctx, MFF_NSH_TTL, oxm, flow->nsh.ttl,
+            match->wc.masks.nsh.ttl);
     nxm_put_8m(&ctx, MFF_NSH_MDTYPE, oxm, flow->nsh.mdtype,
             match->wc.masks.nsh.mdtype);
     nxm_put_8m(&ctx, MFF_NSH_NP, oxm, flow->nsh.np,
             match->wc.masks.nsh.np);
-    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm, flow->nsh.spi,
-                match->wc.masks.nsh.spi);
-    nxm_put_8m(&ctx, MFF_NSH_SI, oxm, flow->nsh.si, match->wc.masks.nsh.si);
+    spi_mask = nsh_path_hdr_to_spi(match->wc.masks.nsh.path_hdr);
+    if (spi_mask == htonl(NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
+        spi_mask = OVS_BE32_MAX;
+    }
+    nxm_put_32m(&ctx, MFF_NSH_SPI, oxm,
+                nsh_path_hdr_to_spi(flow->nsh.path_hdr),
+                spi_mask);
+    nxm_put_8m(&ctx, MFF_NSH_SI, oxm,
+                nsh_path_hdr_to_si(flow->nsh.path_hdr),
+                nsh_path_hdr_to_si(match->wc.masks.nsh.path_hdr));
     for (int i = 0; i < 4; i++) {
         nxm_put_32m(&ctx, MFF_NSH_C1 + i, oxm, flow->nsh.c[i],
                     match->wc.masks.nsh.c[i]);
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5f4d23a..601c8b6 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -277,10 +277,10 @@  odp_set_nsh(struct dp_packet *packet, const struct ovs_key_nsh *key,
             const struct ovs_key_nsh *mask)
 {
     struct nsh_hdr *nsh = dp_packet_l3(packet);
+    ovs_be32 path_hdr;
 
     if (!mask) {
-        nsh->ver_flags_len = htons(key->flags << NSH_FLAGS_SHIFT) |
-                             (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
+        nsh_set_flags_and_ttl(nsh, key->flags, key->ttl);
         put_16aligned_be32(&nsh->path_hdr, key->path_hdr);
         switch (nsh->md_type) {
             case NSH_M_TYPE1:
@@ -294,15 +294,28 @@  odp_set_nsh(struct dp_packet *packet, const struct ovs_key_nsh *key,
                 break;
         }
     } else {
-        uint8_t flags = (ntohs(nsh->ver_flags_len) & NSH_FLAGS_MASK) >>
-                            NSH_FLAGS_SHIFT;
+        uint8_t flags = nsh_get_flags(nsh);
+        uint8_t ttl = nsh_get_ttl(nsh);
+
         flags = key->flags | (flags & ~mask->flags);
-        nsh->ver_flags_len = htons(flags << NSH_FLAGS_SHIFT) |
-                             (nsh->ver_flags_len & ~htons(NSH_FLAGS_MASK));
+        ttl = key->ttl | (ttl & ~mask->ttl);
+        nsh_set_flags_and_ttl (nsh, flags, ttl);
+
+        uint32_t spi = ntohl(nsh_get_spi(nsh));
+        uint8_t si = nsh_get_si(nsh);
+        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask->path_hdr);
+        uint8_t si_mask = nsh_path_hdr_to_si(mask->path_hdr);
 
-        ovs_be32 path_hdr = get_16aligned_be32(&nsh->path_hdr);
-        path_hdr = key->path_hdr | (path_hdr & ~mask->path_hdr);
+        if (spi_mask == 0x00ffffff) {
+            spi_mask = UINT32_MAX;
+        }
+        spi = nsh_path_hdr_to_spi_uint32(key->path_hdr) | (spi & ~spi_mask);
+        si = nsh_path_hdr_to_si(key->path_hdr) | (si & ~si_mask);
+        path_hdr = nsh_get_path_hdr(nsh);
+        nsh_path_hdr_set_spi(&path_hdr, htonl(spi));
+        nsh_path_hdr_set_si(&path_hdr, si);
         put_16aligned_be32(&nsh->path_hdr, path_hdr);
+
         switch (nsh->md_type) {
             case NSH_M_TYPE1:
                 for (int i = 0; i < 4; i++) {
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 4f1499e..ac286ea 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -254,12 +254,13 @@  static void
 format_nsh_key(struct ds *ds, const struct ovs_key_nsh *key)
 {
     ds_put_format(ds, "flags=%d", key->flags);
+    ds_put_format(ds, ",ttl=%d", key->ttl);
     ds_put_format(ds, ",mdtype=%d", key->mdtype);
     ds_put_format(ds, ",np=%d", key->np);
     ds_put_format(ds, ",spi=0x%x",
-                  (ntohl(key->path_hdr) & NSH_SPI_MASK) >> NSH_SPI_SHIFT);
+                  nsh_path_hdr_to_spi_uint32(key->path_hdr));
     ds_put_format(ds, ",si=%d",
-                  (ntohl(key->path_hdr) & NSH_SI_MASK) >> NSH_SI_SHIFT);
+                  nsh_path_hdr_to_si(key->path_hdr));
 
     switch (key->mdtype) {
         case NSH_M_TYPE1:
@@ -319,17 +320,16 @@  format_nsh_key_mask(struct ds *ds, const struct ovs_key_nsh *key,
         format_nsh_key(ds, key);
     } else {
         bool first = true;
-        uint32_t spi = (ntohl(key->path_hdr) & NSH_SPI_MASK) >> NSH_SPI_SHIFT;
-        uint32_t spi_mask = (ntohl(mask->path_hdr) & NSH_SPI_MASK) >>
-                                NSH_SPI_SHIFT;
-        if (spi_mask == 0x00ffffff) {
+        uint32_t spi = nsh_path_hdr_to_spi_uint32(key->path_hdr);
+        uint32_t spi_mask = nsh_path_hdr_to_spi_uint32(mask->path_hdr);
+        if (spi_mask == (NSH_SPI_MASK >> NSH_SPI_SHIFT)) {
             spi_mask = UINT32_MAX;
         }
-        uint8_t si = (ntohl(key->path_hdr) & NSH_SI_MASK) >> NSH_SI_SHIFT;
-        uint8_t si_mask = (ntohl(mask->path_hdr) & NSH_SI_MASK) >>
-                              NSH_SI_SHIFT;
+        uint8_t si = nsh_path_hdr_to_si(key->path_hdr);
+        uint8_t si_mask = nsh_path_hdr_to_si(mask->path_hdr);
 
         format_uint8_masked(ds, &first, "flags", key->flags, mask->flags);
+        format_uint8_masked(ds, &first, "ttl", key->ttl, mask->ttl);
         format_uint8_masked(ds, &first, "mdtype", key->mdtype, mask->mdtype);
         format_uint8_masked(ds, &first, "np", key->np, mask->np);
         format_be32_masked(ds, &first, "spi", htonl(spi), htonl(spi_mask));
@@ -345,12 +345,12 @@  static void
 format_odp_encap_nsh_action(struct ds *ds,
                             const struct ovs_action_encap_nsh *encap_nsh)
  {
-    uint32_t path_hdr = ntohl(encap_nsh->path_hdr);
-    uint32_t spi = (path_hdr & NSH_SPI_MASK) >> NSH_SPI_SHIFT;
-    uint8_t si = (path_hdr & NSH_SI_MASK) >> NSH_SI_SHIFT;
+    uint32_t spi = nsh_path_hdr_to_spi_uint32(encap_nsh->path_hdr);
+    uint8_t si = nsh_path_hdr_to_si(encap_nsh->path_hdr);
 
     ds_put_cstr(ds, "encap_nsh(");
     ds_put_format(ds, "flags=%d", encap_nsh->flags);
+    ds_put_format(ds, ",ttl=%d", encap_nsh->ttl);
     ds_put_format(ds, ",mdtype=%d", encap_nsh->mdtype);
     ds_put_format(ds, ",np=%d", encap_nsh->np);
     ds_put_format(ds, ",spi=0x%x", spi);
@@ -1785,7 +1785,8 @@  parse_odp_encap_nsh_action(const char *s, struct ofpbuf *actions)
 {
     int n = 0;
     int ret = 0;
-    struct ovs_action_encap_nsh encap_nsh;
+    struct ovs_action_encap_nsh *encap_nsh
+        = xmalloc(sizeof *encap_nsh + NSH_CTX_HDRS_MAX_LEN);
     uint32_t spi;
     uint8_t si;
     uint32_t cd;
@@ -1796,11 +1797,12 @@  parse_odp_encap_nsh_action(const char *s, struct ofpbuf *actions)
     }
 
     /* The default is NSH_M_TYPE1 */
-    encap_nsh.flags = 0;
-    encap_nsh.mdtype = NSH_M_TYPE1;
-    encap_nsh.mdlen = NSH_M_TYPE1_MDLEN;
-    encap_nsh.path_hdr = htonl(255);
-    memset(encap_nsh.metadata, 0, NSH_M_TYPE1_MDLEN);
+    encap_nsh->flags = 0;
+    encap_nsh->ttl = 63;
+    encap_nsh->mdtype = NSH_M_TYPE1;
+    encap_nsh->path_hdr = htonl(255);
+    encap_nsh->mdlen = NSH_M_TYPE1_MDLEN;
+    memset(encap_nsh->metadata, 0, NSH_M_TYPE1_MDLEN);
 
     for (;;) {
         n += strspn(s + n, delimiters);
@@ -1808,17 +1810,20 @@  parse_odp_encap_nsh_action(const char *s, struct ofpbuf *actions)
             break;
         }
 
-        if (ovs_scan_len(s, &n, "flags=%"SCNi8, &encap_nsh.flags)) {
+        if (ovs_scan_len(s, &n, "flags=%"SCNi8, &encap_nsh->flags)) {
             continue;
         }
-        if (ovs_scan_len(s, &n, "mdtype=%"SCNi8, &encap_nsh.mdtype)) {
-            switch (encap_nsh.mdtype) {
+        if (ovs_scan_len(s, &n, "ttl=%"SCNi8, &encap_nsh->ttl)) {
+            continue;
+        }
+        if (ovs_scan_len(s, &n, "mdtype=%"SCNi8, &encap_nsh->mdtype)) {
+            switch (encap_nsh->mdtype) {
             case NSH_M_TYPE1:
                 /* This is the default format. */;
                 break;
             case NSH_M_TYPE2:
                 /* Length will be updated later. */
-                encap_nsh.mdlen = 0;
+                encap_nsh->mdlen = 0;
                 break;
             default:
                 ret = -EINVAL;
@@ -1826,24 +1831,24 @@  parse_odp_encap_nsh_action(const char *s, struct ofpbuf *actions)
             }
             continue;
         }
-        if (ovs_scan_len(s, &n, "np=%"SCNi8, &encap_nsh.np)) {
+        if (ovs_scan_len(s, &n, "np=%"SCNi8, &encap_nsh->np)) {
             continue;
         }
         if (ovs_scan_len(s, &n, "spi=0x%"SCNx32, &spi)) {
-            encap_nsh.path_hdr =
+            encap_nsh->path_hdr =
                     htonl(((spi << NSH_SPI_SHIFT) & NSH_SPI_MASK) |
-                            (ntohl(encap_nsh.path_hdr) & ~NSH_SPI_MASK));
+                            (ntohl(encap_nsh->path_hdr) & ~NSH_SPI_MASK));
             continue;
         }
         if (ovs_scan_len(s, &n, "si=%"SCNi8, &si)) {
-            encap_nsh.path_hdr =
+            encap_nsh->path_hdr =
                     htonl((si << NSH_SI_SHIFT) |
-                            (ntohl(encap_nsh.path_hdr) & ~NSH_SI_MASK));
+                            (ntohl(encap_nsh->path_hdr) & ~NSH_SI_MASK));
             continue;
         }
-        if (encap_nsh.mdtype == NSH_M_TYPE1) {
+        if (encap_nsh->mdtype == NSH_M_TYPE1) {
             struct nsh_md1_ctx *md1 =
-                ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh.metadata);
+                ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh->metadata);
             if (ovs_scan_len(s, &n, "c1=0x%"SCNx32, &cd)) {
                 put_16aligned_be32(&md1->c[0], htonl(cd));
                 continue;
@@ -1861,28 +1866,29 @@  parse_odp_encap_nsh_action(const char *s, struct ofpbuf *actions)
                 continue;
             }
         }
-        else if (encap_nsh.mdtype == NSH_M_TYPE2) {
+        else if (encap_nsh->mdtype == NSH_M_TYPE2) {
             struct ofpbuf b;
             char buf[512];
             size_t mdlen;
             if (ovs_scan_len(s, &n, "md2=0x%511[0-9a-fA-F]", buf)) {
-                ofpbuf_use_stub(&b, encap_nsh.metadata,
-                                OVS_ENCAP_NSH_MAX_MD_LEN);
+                ofpbuf_use_stub(&b, encap_nsh->metadata,
+                                NSH_CTX_HDRS_MAX_LEN);
                 ofpbuf_put_hex(&b, buf, &mdlen);
-                encap_nsh.mdlen = mdlen;
+                encap_nsh->mdlen = mdlen;
                 ofpbuf_uninit(&b);
             }
             continue;
         }
     }
 out:
+    free(encap_nsh);
     if (ret < 0) {
         return ret;
     } else {
         size_t size = offsetof(struct ovs_action_encap_nsh, metadata)
-                + ROUND_UP(encap_nsh.mdlen, 4);
+                + ROUND_UP(encap_nsh->mdlen, 4);
         nl_msg_put_unspec(actions, OVS_ACTION_ATTR_ENCAP_NSH,
-                          &encap_nsh, size);
+                          encap_nsh, size);
         return n;
     }
 }
@@ -6613,26 +6619,10 @@  commit_set_nw_action(const struct flow *flow, struct flow *base,
 static void
 get_nsh_key(const struct flow *flow, struct ovs_key_nsh *nsh, bool is_mask)
 {
-    nsh->flags = flow->nsh.flags;
-    nsh->mdtype = flow->nsh.mdtype;
-    nsh->np = flow->nsh.np;
-    nsh->path_hdr = htonl((ntohl(flow->nsh.spi) << NSH_SPI_SHIFT) |
-                          flow->nsh.si);
-    if (is_mask) {
-        for (int i = 0; i < 4; i++) {
-            nsh->c[i] = flow->nsh.c[i];
-        }
-    } else {
-        switch (nsh->mdtype) {
-        case NSH_M_TYPE1:
-            for (int i = 0; i < 4; i++) {
-                nsh->c[i] = flow->nsh.c[i];
-            }
-            break;
-        case NSH_M_TYPE2:
-        default:
-            /* No match support for other MD formats yet. */
-            break;
+    memcpy(nsh, &flow->nsh, sizeof(*nsh));
+    if (!is_mask) {
+        if (nsh->mdtype != NSH_M_TYPE1) {
+            memset(nsh, 0, sizeof(nsh->c));
         }
     }
 }
@@ -6641,23 +6631,9 @@  static void
 put_nsh_key(const struct ovs_key_nsh *nsh, struct flow *flow,
             bool is_mask OVS_UNUSED)
 {
-    flow->nsh.flags = nsh->flags;
-    flow->nsh.mdtype = nsh->mdtype;
-    flow->nsh.np = nsh->np;
-    flow->nsh.spi = htonl((ntohl(nsh->path_hdr) & NSH_SPI_MASK) >>
-                              NSH_SPI_SHIFT);
-    flow->nsh.si = (ntohl(nsh->path_hdr) & NSH_SI_MASK) >> NSH_SI_SHIFT;
-    switch (nsh->mdtype) {
-        case NSH_M_TYPE1:
-            for (int i = 0; i < 4; i++) {
-                flow->nsh.c[i] = nsh->c[i];
-            }
-            break;
-        case NSH_M_TYPE2:
-        default:
-            /* No match support for other MD formats yet. */
-            memset(flow->nsh.c, 0, sizeof flow->nsh.c);
-            break;
+    memcpy(&flow->nsh, nsh, sizeof(*nsh));
+    if (flow->nsh.mdtype != NSH_M_TYPE1) {
+        memset(flow->nsh.c, 0, sizeof(flow->nsh.c));
     }
 }
 
@@ -6798,19 +6774,20 @@  odp_put_encap_nsh_action(struct ofpbuf *odp_actions,
                          const struct flow *flow,
                          struct ofpbuf *encap_data)
 {
-    struct ovs_action_encap_nsh encap_nsh;
+    struct ovs_action_encap_nsh *encap_nsh
+        = xmalloc(sizeof *encap_nsh + NSH_CTX_HDRS_MAX_LEN);
 
-    encap_nsh.flags = flow->nsh.flags;
-    encap_nsh.mdtype = flow->nsh.mdtype;
-    encap_nsh.np = flow->nsh.np;
-    encap_nsh.path_hdr = htonl((ntohl(flow->nsh.spi) << NSH_SPI_SHIFT) |
-                                   flow->nsh.si);
+    encap_nsh->flags = flow->nsh.flags;
+    encap_nsh->ttl = flow->nsh.ttl;
+    encap_nsh->mdtype = flow->nsh.mdtype;
+    encap_nsh->np = flow->nsh.np;
+    encap_nsh->path_hdr = flow->nsh.path_hdr;
 
-    switch (encap_nsh.mdtype) {
+    switch (encap_nsh->mdtype) {
     case NSH_M_TYPE1: {
         struct nsh_md1_ctx *md1 =
-            ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh.metadata);
-        encap_nsh.mdlen = NSH_M_TYPE1_MDLEN;
+            ALIGNED_CAST(struct nsh_md1_ctx *, encap_nsh->metadata);
+        encap_nsh->mdlen = NSH_M_TYPE1_MDLEN;
         for (int i = 0; i < 4; i++) {
             put_16aligned_be32(&md1->c[i], flow->nsh.c[i]);
         }
@@ -6818,19 +6795,20 @@  odp_put_encap_nsh_action(struct ofpbuf *odp_actions,
     }
     case NSH_M_TYPE2:
         if (encap_data) {
-            ovs_assert(encap_data->size < OVS_ENCAP_NSH_MAX_MD_LEN);
-            encap_nsh.mdlen = encap_data->size;
-            memcpy(encap_nsh.metadata, encap_data->data, encap_data->size);
+            ovs_assert(encap_data->size <= NSH_CTX_HDRS_MAX_LEN);
+            encap_nsh->mdlen = encap_data->size;
+            memcpy(encap_nsh->metadata, encap_data->data, encap_data->size);
         } else {
-            encap_nsh.mdlen = 0;
+            encap_nsh->mdlen = 0;
         }
         break;
     default:
-        encap_nsh.mdlen = 0;
+        encap_nsh->mdlen = 0;
         break;
     }
     nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_ENCAP_NSH,
-                      &encap_nsh, sizeof(encap_nsh));
+                      encap_nsh, sizeof(*encap_nsh) + encap_nsh->mdlen);
+    free(encap_nsh);
 }
 
 static void
diff --git a/lib/packets.c b/lib/packets.c
index 74d87ed..80dadf9 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -427,10 +427,11 @@  encap_nsh(struct dp_packet *packet, const struct ovs_action_encap_nsh *encap)
     }
 
     nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length);
-    nsh->ver_flags_len = htons(encap->flags << NSH_FLAGS_SHIFT | length >> 2);
+    nsh->ver_flags_ttl_len = 0;
+    nsh_set_flags_ttl_len(nsh, encap->flags, encap->ttl, length);
+    nsh->md_type = encap->mdtype & NSH_MDTYPE_MASK;
     nsh->next_proto = next_proto;
     put_16aligned_be32(&nsh->path_hdr, encap->path_hdr);
-    nsh->md_type = encap->mdtype;
     switch (nsh->md_type) {
         case NSH_M_TYPE1:
             nsh->md1 = *ALIGNED_CAST(struct nsh_md1_ctx *, encap->metadata);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 9e1f837..1b99ad5 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -5807,7 +5807,7 @@  rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
 {
     ovs_be32 packet_type = flow->packet_type;
     const char *ptr = (char *) encap->props;
-    struct ofpbuf *buf = ofpbuf_new(OVS_ENCAP_NSH_MAX_MD_LEN);
+    struct ofpbuf *buf = ofpbuf_new(NSH_CTX_HDRS_MAX_LEN);
     uint8_t md_type = NSH_M_TYPE1;
     uint8_t np = 0;
     int i;
@@ -5847,7 +5847,7 @@  rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
         }
         ptr += ROUND_UP(prop_ptr->len, 8);
     }
-    if (buf->size == 0 || buf->size > OVS_ENCAP_NSH_MAX_MD_LEN) {
+    if (buf->size == 0 || buf->size > NSH_CTX_HDRS_MAX_LEN) {
         ofpbuf_delete(buf);
         buf = NULL;
     }
@@ -5885,10 +5885,10 @@  rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
     /* Populate the flow with the new NSH header. */
     flow->packet_type = htonl(PT_NSH);
     flow->dl_type = htons(ETH_TYPE_NSH);
-    flow->nsh.flags = 0;    /* */
+    flow->nsh.flags = 0;
+    flow->nsh.ttl = 63;
     flow->nsh.np = np;
-    flow->nsh.spi = 0;
-    flow->nsh.si = 255;
+    flow->nsh.path_hdr = htonl(255);
 
     if (md_type == NSH_M_TYPE1) {
         flow->nsh.mdtype = NSH_M_TYPE1;
@@ -5901,6 +5901,7 @@  rewrite_flow_encap_nsh(struct xlate_ctx *ctx,
     } else if (md_type == NSH_M_TYPE2) {
         flow->nsh.mdtype = NSH_M_TYPE2;
     }
+    flow->nsh.mdtype &= NSH_MDTYPE_MASK;
 
     return buf;
 }
diff --git a/tests/nsh.at b/tests/nsh.at
index aa80a2a..93d8b42 100644
--- a/tests/nsh.at
+++ b/tests/nsh.at
@@ -13,7 +13,7 @@  OVS_VSWITCHD_START([dnl
     add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2])
 
 AT_DATA([flows.txt], [dnl
-    table=0,in_port=1,dl_type=0x894f,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,actions=set_field:0x80->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,2
+    table=0,in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,actions=set_field:0x2->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,2
 ])
 
 AT_CHECK([
@@ -21,25 +21,25 @@  AT_CHECK([
     ovs-ofctl -Oopenflow13 add-flows br0 flows.txt
     ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions
 ], [0], [dnl
- in_port=1,dl_type=0x894f,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344 actions=set_field:128->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,output:2
+ in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344 actions=set_field:2->nsh_flags,set_field:254->nsh_si,set_field:0x44332211->nsh_c1,output:2
 ])
 
 AT_CHECK([
-    ovs-appctl ofproto/trace br0 'in_port=1,dl_type=0x894f,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00'
+    ovs-appctl ofproto/trace br0 'in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00'
 ], [0], [dnl
-Flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
+Flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
 
 bridge("br0")
 -------------
- 0. in_port=1,dl_type=0x894f,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344, priority 32768
-    set_field:128->nsh_flags
+ 0. in_port=1,dl_type=0x894f,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344, priority 32768
+    set_field:2->nsh_flags
     set_field:254->nsh_si
     set_field:0x44332211->nsh_c1
     output:2
 
-Final flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=128,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=254,nsh_c1=0x44332211,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
-Megaflow: recirc_id=0,eth,in_port=1,dl_type=0x894f,nsh_flags=0,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344
-Datapath actions: set(nsh(flags=128,spi=0x123456,si=254,c1=0x44332211)),2
+Final flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=2,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=254,nsh_c1=0x44332211,nsh_c2=0x55667788,nsh_c3=0x99aabbcc,nsh_c4=0xddeeff00,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
+Megaflow: recirc_id=0,eth,in_port=1,dl_type=0x894f,nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x123456,nsh_si=255,nsh_c1=0x11223344
+Datapath actions: set(nsh(flags=2,ttl=63,spi=0x123456,si=254,c1=0x44332211)),2
 ])
 
 OVS_VSWITCHD_STOP
@@ -103,15 +103,15 @@  bridge("br0")
     decap()
     decap()
 
-Final flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=11:22:33:44:55:66,dl_type=0x894f,nsh_flags=0,nsh_mdtype=1,nsh_np=3,nsh_spi=0x1234,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x0,nsh_c3=0x0,nsh_c4=0x0,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
+Final flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=11:22:33:44:55:66,dl_type=0x894f,nsh_flags=0,nsh_ttl=63,nsh_mdtype=1,nsh_np=3,nsh_spi=0x1234,nsh_si=255,nsh_c1=0x11223344,nsh_c2=0x0,nsh_c3=0x0,nsh_c4=0x0,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
 Megaflow: recirc_id=0,eth,ip,in_port=1,dl_dst=66:77:88:99:aa:bb,nw_frag=no
-Datapath actions: encap_nsh(flags=0,mdtype=1,np=3,spi=0x1234,si=255,c1=0x11223344,c2=0x0,c3=0x0,c4=0x0),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x1)
+Datapath actions: encap_nsh(flags=0,ttl=63,mdtype=1,np=3,spi=0x1234,si=255,c1=0x11223344,c2=0x0,c3=0x0,c4=0x0),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x1)
 ])
 
 AT_CHECK([
     ovs-appctl ofproto/trace br0 'in_port=4,dl_type=0x894f,nsh_mdtype=1,nsh_np=3,nsh_spi=0x1234,nsh_c1=0x11223344'
 ], [0], [dnl
-Flow: in_port=4,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_mdtype=1,nsh_np=3,nsh_spi=0x1234,nsh_si=0,nsh_c1=0x11223344,nsh_c2=0x0,nsh_c3=0x0,nsh_c4=0x0,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
+Flow: in_port=4,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_ttl=0,nsh_mdtype=1,nsh_np=3,nsh_spi=0x1234,nsh_si=0,nsh_c1=0x11223344,nsh_c2=0x0,nsh_c3=0x0,nsh_c4=0x0,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
 
 bridge("br0")
 -------------
@@ -139,7 +139,7 @@  ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows dummy@ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:encap_nsh(flags=0,mdtype=1,np=3,spi=0x1234,si=255,c1=0x11223344,c2=0x0,c3=0x0,c4=0x0),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x3)
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:encap_nsh(flags=0,ttl=63,mdtype=1,np=3,spi=0x1234,si=255,c1=0x11223344,c2=0x0,c3=0x0,c4=0x0),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x3)
 recirc_id(0x3),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:2
 ])
 
@@ -170,7 +170,7 @@  ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows dummy@ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:push_vlan(vid=100,pcp=0),encap_nsh(flags=0,mdtype=1,np=3,spi=0x0,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),decap_nsh(),recirc(0x4)
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:push_vlan(vid=100,pcp=0),encap_nsh(flags=0,ttl=63,mdtype=1,np=3,spi=0x0,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),decap_nsh(),recirc(0x4)
 recirc_id(0x4),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=100,pcp=0),encap(eth_type(0x0800),ipv4(frag=no)), packets:1, bytes:102, used:0.0s, actions:2
 ])
 
@@ -195,7 +195,7 @@  ovs-vsctl set bridge br0 datapath_type=dummy \
         add-port br0 v4 -- set Interface v4 type=patch options:peer=v3 ofport_request=4])
 
 AT_DATA([flows.txt], [dnl
-    table=0,in_port=1,ip,actions=encap(nsh(md_type=2,tlv(0x1000,10,0x12345678))),set_field:0x1234->nsh_spi,encap(ethernet),set_field:11:22:33:44:55:66->dl_dst,3
+    table=0,in_port=1,ip,actions=encap(nsh(md_type=2,tlv(0x1000,10,0x12345678),tlv(0x2000,20,0xfedcba9876543210))),set_field:0x1234->nsh_spi,encap(ethernet),set_field:11:22:33:44:55:66->dl_dst,3
     table=0,in_port=4,dl_type=0x894f,nsh_mdtype=2,nsh_spi=0x1234,actions=decap(),decap(),2
 ])
 
@@ -205,7 +205,7 @@  AT_CHECK([
     ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions
 ], [0], [dnl
  in_port=4,dl_type=0x894f,nsh_mdtype=2,nsh_spi=0x1234 actions=decap(),decap(),output:2
- ip,in_port=1 actions=encap(nsh(md_type=2,tlv(0x1000,10,0x12345678))),set_field:0x1234->nsh_spi,encap(ethernet),set_field:11:22:33:44:55:66->eth_dst,output:3
+ ip,in_port=1 actions=encap(nsh(md_type=2,tlv(0x1000,10,0x12345678),tlv(0x2000,20,0xfedcba9876543210))),set_field:0x1234->nsh_spi,encap(ethernet),set_field:11:22:33:44:55:66->eth_dst,output:3
 ])
 
 AT_CHECK([
@@ -216,7 +216,7 @@  Flow: icmp,in_port=1,vlan_tci=0x0000,dl_src=00:11:22:33:44:55,dl_dst=66:77:88:99
 bridge("br0")
 -------------
  0. ip,in_port=1, priority 32768
-    encap(nsh(md_type=2,tlv(0x1000,10,0x12345678)))
+    encap(nsh(md_type=2,tlv(0x1000,10,0x12345678),tlv(0x2000,20,0xfedcba9876543210)))
     set_field:0x1234->nsh_spi
     encap(ethernet)
     set_field:11:22:33:44:55:66->eth_dst
@@ -228,15 +228,15 @@  bridge("br0")
     decap()
     decap()
 
-Final flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=11:22:33:44:55:66,dl_type=0x894f,nsh_flags=0,nsh_mdtype=2,nsh_np=3,nsh_spi=0x1234,nsh_si=255,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
+Final flow: in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=11:22:33:44:55:66,dl_type=0x894f,nsh_flags=0,nsh_ttl=63,nsh_mdtype=2,nsh_np=3,nsh_spi=0x1234,nsh_si=255,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
 Megaflow: recirc_id=0,eth,ip,in_port=1,dl_dst=66:77:88:99:aa:bb,nw_frag=no
-Datapath actions: encap_nsh(flags=0,mdtype=2,np=3,spi=0x1234,si=255,md2=0x10000a0412345678),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x1)
+Datapath actions: encap_nsh(flags=0,ttl=63,mdtype=2,np=3,spi=0x1234,si=255,md2=0x10000a041234567820001408fedcba9876543210),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x1)
 ])
 
 AT_CHECK([
     ovs-appctl ofproto/trace br0 'in_port=4,dl_type=0x894f,nsh_mdtype=2,nsh_np=3,nsh_spi=0x1234'
 ], [0], [dnl
-Flow: in_port=4,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_mdtype=2,nsh_np=3,nsh_spi=0x1234,nsh_si=0,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
+Flow: in_port=4,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x894f,nsh_flags=0,nsh_ttl=0,nsh_mdtype=2,nsh_np=3,nsh_spi=0x1234,nsh_si=0,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=0
 
 bridge("br0")
 -------------
@@ -264,7 +264,7 @@  ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows dummy@ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:encap_nsh(flags=0,mdtype=2,np=3,spi=0x1234,si=255,md2=0x10000a0412345678),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x3)
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=1e:2c:e9:2a:66:9e),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:encap_nsh(flags=0,ttl=63,mdtype=2,np=3,spi=0x1234,si=255,md2=0x10000a041234567820001408fedcba9876543210),push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),pop_eth,decap_nsh(),set(eth(dst=11:22:33:44:55:66)),recirc(0x3)
 recirc_id(0x3),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:1, bytes:98, used:0.0s, actions:2
 ])
 
@@ -577,7 +577,7 @@  ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows dummy@ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.10.30,frag=no), packets:1, bytes:98, used:0.0s, actions:pop_eth,encap_nsh(flags=0,mdtype=1,np=1,spi=0x3000,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=aa:55:00:00:00:03,src=aa:55:00:00:00:01,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0xc000004,vni=0x0)),out_port(1)),set(ipv4(src=30.0.0.1,dst=30.0.0.3)),tnl_pop(4789))
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.10.30,frag=no), packets:1, bytes:98, used:0.0s, actions:pop_eth,encap_nsh(flags=0,ttl=63,mdtype=1,np=1,spi=0x3000,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=aa:55:00:00:00:03,src=aa:55:00:00:00:01,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.3,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0xc000004,vni=0x0)),out_port(1)),set(ipv4(src=30.0.0.1,dst=30.0.0.3)),tnl_pop(4789))
 tunnel(tun_id=0x0,src=30.0.0.1,dst=30.0.0.3,flags(-df-csum+key)),recirc_id(0),in_port(4789),packet_type(ns=1,id=0x894f),nsh(np=1,spi=0x3000,si=255), packets:1, bytes:108, used:0.0s, actions:decap_nsh(),recirc(0x1)
 tunnel(tun_id=0x0,src=30.0.0.1,dst=30.0.0.3,flags(-df-csum+key)),recirc_id(0x1),in_port(4789),packet_type(ns=1,id=0x800),ipv4(frag=no), packets:1, bytes:84, used:0.0s, actions:push_eth(src=00:00:00:00:00:00,dst=aa:55:aa:55:00:03),6
 ])
@@ -631,7 +631,7 @@  ovs-appctl time/warp 1000
 AT_CHECK([
     ovs-appctl dpctl/dump-flows dummy@ovs-dummy | strip_used | grep -v ipv6 | sort
 ], [0], [flow-dump from non-dpdk interfaces:
-recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.10.20/255.255.255.248,frag=no), packets:1, bytes:98, used:0.0s, actions:pop_eth,encap_nsh(flags=0,mdtype=1,np=1,spi=0x3020,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=aa:55:00:00:00:02,src=aa:55:00:00:00:01,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0xc000004,vni=0x0)),out_port(1)),set(ipv4(src=20.0.0.1,dst=20.0.0.2)),tnl_pop(4789))
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.10.20/255.255.255.248,frag=no), packets:1, bytes:98, used:0.0s, actions:pop_eth,encap_nsh(flags=0,ttl=63,mdtype=1,np=1,spi=0x3020,si=255,c1=0x0,c2=0x0,c3=0x0,c4=0x0),clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=aa:55:00:00:00:02,src=aa:55:00:00:00:01,dl_type=0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0xc000004,vni=0x0)),out_port(1)),set(ipv4(src=20.0.0.1,dst=20.0.0.2)),tnl_pop(4789))
 tunnel(tun_id=0x0,src=20.0.0.1,dst=20.0.0.2,flags(-df-csum+key)),recirc_id(0),in_port(4789),packet_type(ns=1,id=0x894f),nsh(spi=0x3020,si=255), packets:1, bytes:108, used:0.0s, actions:push_eth(src=00:00:00:00:00:00,dst=11:22:33:44:55:66),set(nsh(spi=0x3020,si=254)),pop_eth,clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=aa:55:00:00:00:03,src=aa:55:00:00:00:02,dl_type=0x0800),ipv4(src=20.0.0.2,dst=20.0.0.3,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0xc000004,vni=0x0)),out_port(2)),set(ipv4(src=30.0.0.2,dst=30.0.0.3)),tnl_pop(4789))
 tunnel(tun_id=0x0,src=30.0.0.2,dst=30.0.0.3,flags(-df-csum+key)),recirc_id(0),in_port(4789),packet_type(ns=1,id=0x894f),nsh(np=1,spi=0x3020,si=254), packets:1, bytes:108, used:0.0s, actions:decap_nsh(),recirc(0x2)
 tunnel(tun_id=0x0,src=30.0.0.2,dst=30.0.0.3,flags(-df-csum+key)),recirc_id(0x2),in_port(4789),packet_type(ns=1,id=0x800),ipv4(frag=no), packets:1, bytes:84, used:0.0s, actions:push_eth(src=00:00:00:00:00:00,dst=aa:55:aa:55:00:03),6