diff mbox

[ovs-dev] ofproto-dpif: Send QinQ related sFlow counters

Message ID 1492513702-213729-1-git-send-email-lukaszx.rzasik@intel.com
State Changes Requested
Headers show

Commit Message

Lukasz Rzasik April 18, 2017, 11:08 a.m. UTC
This patch implements QinQ related sFlow counters.
It is implemented according to sFlow Version 5,
http://www.sflow.org/sflow_version_5.txt
Open vSwitch will send a stack of stripped VLAN tags
to sFlow collector if the original packets have multiple
VLAN tags.

Unit tests have been updated accordingly.

The patch is based on commit f0fb825a37 (Add support
for 802.1ad (QinQ tunneling))

Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
CC: Thomas F Herbert <thomasfherbert@gmail.com>
CC: Xiao Liang <shaw.leon@gmail.com>
CC: Eric Garver <e@erig.me>
CC: Neil McKee <neil.mckee@inmon.com>
---
 lib/packets.h                |  4 +++
 ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---
 ofproto/ofproto-dpif-sflow.h |  9 +++++
 tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
 tests/test-sflow.c           | 25 +++++++++++++
 5 files changed, 177 insertions(+), 4 deletions(-)

Comments

Eric Garver April 18, 2017, 3:24 p.m. UTC | #1
Hi Lukasz,

Thanks for expanding the 802.1ad support.
I'm not familiar with sFlow, so I'll provide what feedback I can.

On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
> This patch implements QinQ related sFlow counters.
> It is implemented according to sFlow Version 5,
> http://www.sflow.org/sflow_version_5.txt
> Open vSwitch will send a stack of stripped VLAN tags
> to sFlow collector if the original packets have multiple
> VLAN tags.
> 
> Unit tests have been updated accordingly.
> 
> The patch is based on commit f0fb825a37 (Add support
> for 802.1ad (QinQ tunneling))
> 
> Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
> CC: Thomas F Herbert <thomasfherbert@gmail.com>
> CC: Xiao Liang <shaw.leon@gmail.com>
> CC: Eric Garver <e@erig.me>
> CC: Neil McKee <neil.mckee@inmon.com>
> ---
>  lib/packets.h                |  4 +++
>  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---
>  ofproto/ofproto-dpif-sflow.h |  9 +++++
>  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
>  tests/test-sflow.c           | 25 +++++++++++++
>  5 files changed, 177 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/packets.h b/lib/packets.h
> index 755f08d..7555dfc 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
>          eth_type == htons(ETH_TYPE_VLAN_8021AD);
>  }
>  
> +static inline bool eth_type_8021Q(ovs_be16 eth_type)
> +{
> +    return eth_type == htons(ETH_TYPE_VLAN_8021Q);
> +}
>  
>  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
>   * lengths. */
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index 59fafa1..41972e2 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
>      }
>  }
>  
> +static void
> +dpif_sflow_pop_vlan(const struct flow *flow,
> +                    struct dpif_sflow_actions *sflow_actions)
> +{
> +    union flow_vlan_hdr vlan = flow->vlans[0];
> +    if (eth_type_vlan(vlan.tpid)) {
> +        int depth = 0;
> +        int ii;
> +        /* Calculate depth by detecting 8021Q TPID. */
> +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
> +            vlan = flow->vlans[ii];
> +            depth++;
> +            if (eth_type_8021Q(vlan.tpid)) {
> +                break;
> +            }

This seems to be using 0x8100 as a marker to stop. Does this mean double
stacked 0x8100 will not work?

flow_count_vlan_headers() can be used to find the number of tags.

> +        }
> +
> +        if (depth > 1) {
> +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
> +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
> +            sflow_actions->vlan_stack_depth++;
> +        }

AFAICS, this only adds the outermost tag to the stack. Is that
intentional? It's unclear to me if sFlow expects one or two VLANs to be
in the stack.

> +    }
> +}
> +
>  void
>  dpif_sflow_read_actions(const struct flow *flow,
>  			const struct nlattr *actions, size_t actions_len,
> @@ -1170,11 +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
>  	    break;
>  
>  	case OVS_ACTION_ATTR_PUSH_VLAN:
> +            break;
> +
>  	case OVS_ACTION_ATTR_POP_VLAN:
> -	    /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
> -	     * construct a VLAN-stack. The sFlow user-action cookie already
> -	     * captures the egress VLAN ID so there is nothing more to do here.
> -	     */
> +            dpif_sflow_pop_vlan(flow, sflow_actions);
>  	    break;
>  
>  	case OVS_ACTION_ATTR_PUSH_MPLS: {
> @@ -1225,6 +1249,19 @@ dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
>      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
>  }
>  
> +static void
> +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
> +                             uint32_t *vlans_buf,
> +                             const struct dpif_sflow_actions *sflow_actions)
> +{
> +    int ii;
> +    stack->depth = sflow_actions->vlan_stack_depth;
> +    stack->stack = vlans_buf;
> +    for (ii = 0; ii < stack->depth; ii++) {
> +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
> +    }
> +}
> +
>  /* Extract the output port count from the user action cookie.
>   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
>   */
> @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>      SFLFlow_sample_element vniInElem, vniOutElem;
>      SFLFlow_sample_element mplsElem;
>      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
> +    SFLFlow_sample_element vlanTunnelElem;
> +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
>      SFLSampler *sampler;
>      struct dpif_sflow_port *in_dsp;
>      struct dpif_sflow_port *out_dsp;
> @@ -1372,6 +1411,19 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>  	SFLADD_ELEMENT(&fs, &mplsElem);
>      }
>  
> +    /* stripped VLAN tags stack. */
> +    if (sflow_actions
> +        && sflow_actions->vlan_stack_depth > 0
> +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
> +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
> +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
> +        dpif_sflow_encode_vlan_stack(
> +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
> +                                 vlans_buf,
> +                                 sflow_actions);
> +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
> +    }
> +
>      /* Submit the flow sample to be encoded into the next datagram. */
>      SFLADD_ELEMENT(&fs, &hdrElem);
>      SFLADD_ELEMENT(&fs, &switchElem);
> diff --git a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h
> index 014e6cc..cfd5492 100644
> --- a/ofproto/ofproto-dpif-sflow.h
> +++ b/ofproto/ofproto-dpif-sflow.h
> @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
>      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
>      uint32_t mpls_stack_depth;               /* Out stack depth. */
>      bool mpls_err;                           /* MPLS actions parse failure. */
> +
> +    /* Using host-byte order for the vlan stack here
> +       to match the expectations of the sFlow library.
> +       List of stripped 802.1Q TPID/TCI layers. Each
> +       TPID,TCI pair is represented as a single 32 bit
> +       integer. Layers listed from outermost to innermost.
> +    */
> +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
> +    uint32_t vlan_stack_depth;                 /* Stack depth. */
>  };
>  
>  struct dpif_sflow *dpif_sflow_create(void);
> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
> index 0c2ea38..f096175 100644
> --- a/tests/ofproto-dpif.at
> +++ b/tests/ofproto-dpif.at
> @@ -6333,6 +6333,89 @@ HEADER
>  OVS_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN tunnel])
> +AT_XFAIL_IF([test "$IS_WIN32" = "yes"])
> +OVS_VSWITCHD_START
> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
> +add_of_ports br0 1 2
> +AT_DATA([flows.txt], [dnl
> +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
> +])
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +dnl set up sFlow logging
> +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
> +AT_CAPTURE_FILE([sflow.log])
> +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT])
> +ovs-appctl time/stop
> +
> +dnl configure sflow
> +ovs-vsctl \
> +   set Bridge br0 sflow=@sf -- \
> +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
> +     header=128 sampling=1 polling=0
> +
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
> +
> +dnl sleep long enough to get the sFlow datagram flushed out (may be delayed for up to 1 second)
> +for i in `seq 1 30`; do
> +    ovs-appctl time/warp 100
> +done
> +
> +OVS_APP_EXIT_AND_WAIT([test-sflow])
> +
> +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
> +        /g']], [0], [dnl
> +HEADER
> +        dgramSeqNo=1
> +        ds=127.0.0.1>2:1000
> +        fsSeqNo=1
> +        ext_vlan_tpid_0=0x88a8
> +        ext_vlan_vid_0=150
> +        ext_vlan_pcp_0=0
> +        ext_vlan_cfi_0=1
> +        in_vlan=150
> +        in_priority=0
> +        out_vlan=0
> +        out_priority=0
> +        meanSkip=1
> +        samplePool=1
> +        dropEvents=0
> +        in_ifindex=0
> +        in_format=0
> +        out_ifindex=1
> +        out_format=2
> +        hdr_prot=1
> +        pkt_len=22
> +        stripped=4
> +        hdr_len=18
> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
> +HEADER
> +        dgramSeqNo=1
> +        ds=127.0.0.1>2:1000
> +        fsSeqNo=2
> +        in_vlan=2000
> +        in_priority=0
> +        out_vlan=0
> +        out_priority=0
> +        meanSkip=1
> +        samplePool=2
> +        dropEvents=0
> +        in_ifindex=0
> +        in_format=0
> +        out_ifindex=1
> +        out_format=2
> +        hdr_prot=1
> +        pkt_len=42
> +        stripped=4
> +        hdr_len=38
> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-14-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
> +])
> +
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  
>  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
>  #
> diff --git a/tests/test-sflow.c b/tests/test-sflow.c
> index 6125d38..7d58ef3 100644
> --- a/tests/test-sflow.c
> +++ b/tests/test-sflow.c
> @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;
>  #define SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029
>  #define SFLOW_TAG_PKT_TUNNEL_VNI_IN 1030
>  #define SFLOW_TAG_PKT_MPLS 1006
> +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
>  
>  /* string sizes */
>  #define SFL_MAX_PORTNAME_LEN 255
> @@ -116,6 +117,7 @@ struct sflow_xdr {
>  	uint32_t TUNNEL_VNI_OUT;
>  	uint32_t TUNNEL_VNI_IN;
>  	uint32_t MPLS;
> +        uint32_t EXT_VLAN;
>  	uint32_t IFCOUNTERS;
>  	uint32_t ETHCOUNTERS;
>  	uint32_t LACPCOUNTERS;
> @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
>              }
>          }
>  
> +        if (x->offset.EXT_VLAN) {
> +            uint32_t stack_depth, ii;
> +            union flow_vlan_hdr vlan;
> +            sflowxdr_setc(x, x->offset.EXT_VLAN);
> +            /* print stripped VLAN tags stack */
> +            stack_depth = sflowxdr_next(x);
> +            for (ii = 0; ii < stack_depth; ii++) {
> +                vlan.qtag=sflowxdr_next_n(x);
> +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
> +                       ii, ntohs(vlan.tpid));
> +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
> +                       ii, vlan_tci_to_vid(vlan.tci));
> +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
> +                       ii, vlan_tci_to_pcp(vlan.tci));
> +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
> +                       ii, vlan_tci_to_cfi(vlan.tci));
> +            }
> +        }
> +
>          if (x->offset.SWITCH) {
>              sflowxdr_setc(x, x->offset.SWITCH);
>              printf(" in_vlan=%"PRIu32, sflowxdr_next(x));
> @@ -634,6 +655,10 @@ process_datagram(struct sflow_xdr *x)
>                      sflowxdr_mark_unique(x, &x->offset.MPLS);
>                      break;
>  
> +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
> +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
> +                    break;
> +
>                      /* Add others here... */
>                  }
>  
> -- 
> 1.9.3
> 
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Neil McKee April 18, 2017, 5:16 p.m. UTC | #2
This patch seems like it may be reporting a VLAN stack that can
already be decoded from the (ingress-sampled) packet header?

The TODO I left in the code may have been misleading...  this
structure is intended for the case where the sampled packet-header
includes non-standard 802.1Q headers -- which then have to be stripped
out of the sampled header to make it decodable,  or where the 802.1Q
headers have already been stripped out of the packet before you
sampled it (e.g. by hardware).

 Here is the wording from the sFlow spec at
http://sflow.org/sflow_version_5.txt:

/* Extended VLAN tunnel information
   Record outer VLAN encapsulations that have
   been stripped. extended_vlantunnel information
   should only be reported if all the following conditions are satisfied:
     1. The packet has nested vlan tags, AND
     2. The reporting device is VLAN aware, AND
     3. One or more VLAN tags have been stripped, either
        because they represent proprietary encapsulations, or
        because switch hardware automatically strips the outer VLAN
        encapsulation.
   Reporting extended_vlantunnel information is not a substitute for
   reporting extended_switch information. extended_switch data must
   always be reported to describe the ingress/egress VLAN information
   for the packet. The extended_vlantunnel information only applies to
   nested VLAN tags, and then only when one or more tags has been
   stripped. */
/* opaque = flow_data; enterprise = 0; format = 1012 */
extended_vlantunnel {
  unsigned int stack<>;  /* List of stripped 802.1Q TPID/TCI layers. Each
                            TPID,TCI pair is represented as a single 32 bit
                            integer. Layers listed from outermost to
                            innermost. */
}

My impression is that non-standard 802.1Q headers are now rare
(particularly where OVS is concerned).  i.e. everyone uses the
standard ethernet types 0x8100, 0x9100 etc.   So it's unlikely that
the OVS sFlow agent will need to doctor the sampled header to remove
non-standard 802.1Q headers.

[Quite separately,  if the OVS flow actions included instructions to
*push* vlans onto the VLAN stack before forwarding the packet,  then
it would make sense to report that in the sFlow export just as we do
when packets are pushed onto other kinds of tunnel (MPLS, VXLAN etc.).
  Unfortunately there doesn't seem to be an sFlow structure that
represents this today (e.g. in http://sflow.org/sflow_tunnels.txt)]
------
Neil McKee
InMon Corp.
http://www.inmon.com


On Tue, Apr 18, 2017 at 8:24 AM, Eric Garver <e@erig.me> wrote:
> Hi Lukasz,
>
> Thanks for expanding the 802.1ad support.
> I'm not familiar with sFlow, so I'll provide what feedback I can.
>
> On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
>> This patch implements QinQ related sFlow counters.
>> It is implemented according to sFlow Version 5,
>> http://www.sflow.org/sflow_version_5.txt
>> Open vSwitch will send a stack of stripped VLAN tags
>> to sFlow collector if the original packets have multiple
>> VLAN tags.
>>
>> Unit tests have been updated accordingly.
>>
>> The patch is based on commit f0fb825a37 (Add support
>> for 802.1ad (QinQ tunneling))
>>
>> Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
>> CC: Thomas F Herbert <thomasfherbert@gmail.com>
>> CC: Xiao Liang <shaw.leon@gmail.com>
>> CC: Eric Garver <e@erig.me>
>> CC: Neil McKee <neil.mckee@inmon.com>
>> ---
>>  lib/packets.h                |  4 +++
>>  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---
>>  ofproto/ofproto-dpif-sflow.h |  9 +++++
>>  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
>>  tests/test-sflow.c           | 25 +++++++++++++
>>  5 files changed, 177 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/packets.h b/lib/packets.h
>> index 755f08d..7555dfc 100644
>> --- a/lib/packets.h
>> +++ b/lib/packets.h
>> @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
>>          eth_type == htons(ETH_TYPE_VLAN_8021AD);
>>  }
>>
>> +static inline bool eth_type_8021Q(ovs_be16 eth_type)
>> +{
>> +    return eth_type == htons(ETH_TYPE_VLAN_8021Q);
>> +}
>>
>>  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
>>   * lengths. */
>> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
>> index 59fafa1..41972e2 100644
>> --- a/ofproto/ofproto-dpif-sflow.c
>> +++ b/ofproto/ofproto-dpif-sflow.c
>> @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
>>      }
>>  }
>>
>> +static void
>> +dpif_sflow_pop_vlan(const struct flow *flow,
>> +                    struct dpif_sflow_actions *sflow_actions)
>> +{
>> +    union flow_vlan_hdr vlan = flow->vlans[0];
>> +    if (eth_type_vlan(vlan.tpid)) {
>> +        int depth = 0;
>> +        int ii;
>> +        /* Calculate depth by detecting 8021Q TPID. */
>> +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
>> +            vlan = flow->vlans[ii];
>> +            depth++;
>> +            if (eth_type_8021Q(vlan.tpid)) {
>> +                break;
>> +            }
>
> This seems to be using 0x8100 as a marker to stop. Does this mean double
> stacked 0x8100 will not work?
>
> flow_count_vlan_headers() can be used to find the number of tags.
>
>> +        }
>> +
>> +        if (depth > 1) {
>> +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
>> +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
>> +            sflow_actions->vlan_stack_depth++;
>> +        }
>
> AFAICS, this only adds the outermost tag to the stack. Is that
> intentional? It's unclear to me if sFlow expects one or two VLANs to be
> in the stack.
>
>> +    }
>> +}
>> +
>>  void
>>  dpif_sflow_read_actions(const struct flow *flow,
>>                       const struct nlattr *actions, size_t actions_len,
>> @@ -1170,11 +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
>>           break;
>>
>>       case OVS_ACTION_ATTR_PUSH_VLAN:
>> +            break;
>> +
>>       case OVS_ACTION_ATTR_POP_VLAN:
>> -         /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
>> -          * construct a VLAN-stack. The sFlow user-action cookie already
>> -          * captures the egress VLAN ID so there is nothing more to do here.
>> -          */
>> +            dpif_sflow_pop_vlan(flow, sflow_actions);
>>           break;
>>
>>       case OVS_ACTION_ATTR_PUSH_MPLS: {
>> @@ -1225,6 +1249,19 @@ dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
>>      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
>>  }
>>
>> +static void
>> +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
>> +                             uint32_t *vlans_buf,
>> +                             const struct dpif_sflow_actions *sflow_actions)
>> +{
>> +    int ii;
>> +    stack->depth = sflow_actions->vlan_stack_depth;
>> +    stack->stack = vlans_buf;
>> +    for (ii = 0; ii < stack->depth; ii++) {
>> +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
>> +    }
>> +}
>> +
>>  /* Extract the output port count from the user action cookie.
>>   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
>>   */
>> @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>>      SFLFlow_sample_element vniInElem, vniOutElem;
>>      SFLFlow_sample_element mplsElem;
>>      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
>> +    SFLFlow_sample_element vlanTunnelElem;
>> +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
>>      SFLSampler *sampler;
>>      struct dpif_sflow_port *in_dsp;
>>      struct dpif_sflow_port *out_dsp;
>> @@ -1372,6 +1411,19 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>>       SFLADD_ELEMENT(&fs, &mplsElem);
>>      }
>>
>> +    /* stripped VLAN tags stack. */
>> +    if (sflow_actions
>> +        && sflow_actions->vlan_stack_depth > 0
>> +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
>> +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
>> +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
>> +        dpif_sflow_encode_vlan_stack(
>> +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
>> +                                 vlans_buf,
>> +                                 sflow_actions);
>> +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
>> +    }
>> +
>>      /* Submit the flow sample to be encoded into the next datagram. */
>>      SFLADD_ELEMENT(&fs, &hdrElem);
>>      SFLADD_ELEMENT(&fs, &switchElem);
>> diff --git a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h
>> index 014e6cc..cfd5492 100644
>> --- a/ofproto/ofproto-dpif-sflow.h
>> +++ b/ofproto/ofproto-dpif-sflow.h
>> @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
>>      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
>>      uint32_t mpls_stack_depth;               /* Out stack depth. */
>>      bool mpls_err;                           /* MPLS actions parse failure. */
>> +
>> +    /* Using host-byte order for the vlan stack here
>> +       to match the expectations of the sFlow library.
>> +       List of stripped 802.1Q TPID/TCI layers. Each
>> +       TPID,TCI pair is represented as a single 32 bit
>> +       integer. Layers listed from outermost to innermost.
>> +    */
>> +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
>> +    uint32_t vlan_stack_depth;                 /* Stack depth. */
>>  };
>>
>>  struct dpif_sflow *dpif_sflow_create(void);
>> diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
>> index 0c2ea38..f096175 100644
>> --- a/tests/ofproto-dpif.at
>> +++ b/tests/ofproto-dpif.at
>> @@ -6333,6 +6333,89 @@ HEADER
>>  OVS_VSWITCHD_STOP
>>  AT_CLEANUP
>>
>> +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN tunnel])
>> +AT_XFAIL_IF([test "$IS_WIN32" = "yes"])
>> +OVS_VSWITCHD_START
>> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
>> +add_of_ports br0 1 2
>> +AT_DATA([flows.txt], [dnl
>> +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
>> +])
>> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
>> +
>> +dnl set up sFlow logging
>> +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
>> +AT_CAPTURE_FILE([sflow.log])
>> +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT])
>> +ovs-appctl time/stop
>> +
>> +dnl configure sflow
>> +ovs-vsctl \
>> +   set Bridge br0 sflow=@sf -- \
>> +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
>> +     header=128 sampling=1 polling=0
>> +
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
>> +
>> +dnl sleep long enough to get the sFlow datagram flushed out (may be delayed for up to 1 second)
>> +for i in `seq 1 30`; do
>> +    ovs-appctl time/warp 100
>> +done
>> +
>> +OVS_APP_EXIT_AND_WAIT([test-sflow])
>> +
>> +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
>> +        /g']], [0], [dnl
>> +HEADER
>> +        dgramSeqNo=1
>> +        ds=127.0.0.1>2:1000
>> +        fsSeqNo=1
>> +        ext_vlan_tpid_0=0x88a8
>> +        ext_vlan_vid_0=150
>> +        ext_vlan_pcp_0=0
>> +        ext_vlan_cfi_0=1
>> +        in_vlan=150
>> +        in_priority=0
>> +        out_vlan=0
>> +        out_priority=0
>> +        meanSkip=1
>> +        samplePool=1
>> +        dropEvents=0
>> +        in_ifindex=0
>> +        in_format=0
>> +        out_ifindex=1
>> +        out_format=2
>> +        hdr_prot=1
>> +        pkt_len=22
>> +        stripped=4
>> +        hdr_len=18
>> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
>> +HEADER
>> +        dgramSeqNo=1
>> +        ds=127.0.0.1>2:1000
>> +        fsSeqNo=2
>> +        in_vlan=2000
>> +        in_priority=0
>> +        out_vlan=0
>> +        out_priority=0
>> +        meanSkip=1
>> +        samplePool=2
>> +        dropEvents=0
>> +        in_ifindex=0
>> +        in_format=0
>> +        out_ifindex=1
>> +        out_format=2
>> +        hdr_prot=1
>> +        pkt_len=42
>> +        stripped=4
>> +        hdr_len=38
>> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-14-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
>> +])
>> +
>> +OVS_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>>
>>  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
>>  #
>> diff --git a/tests/test-sflow.c b/tests/test-sflow.c
>> index 6125d38..7d58ef3 100644
>> --- a/tests/test-sflow.c
>> +++ b/tests/test-sflow.c
>> @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;
>>  #define SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029
>>  #define SFLOW_TAG_PKT_TUNNEL_VNI_IN 1030
>>  #define SFLOW_TAG_PKT_MPLS 1006
>> +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
>>
>>  /* string sizes */
>>  #define SFL_MAX_PORTNAME_LEN 255
>> @@ -116,6 +117,7 @@ struct sflow_xdr {
>>       uint32_t TUNNEL_VNI_OUT;
>>       uint32_t TUNNEL_VNI_IN;
>>       uint32_t MPLS;
>> +        uint32_t EXT_VLAN;
>>       uint32_t IFCOUNTERS;
>>       uint32_t ETHCOUNTERS;
>>       uint32_t LACPCOUNTERS;
>> @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
>>              }
>>          }
>>
>> +        if (x->offset.EXT_VLAN) {
>> +            uint32_t stack_depth, ii;
>> +            union flow_vlan_hdr vlan;
>> +            sflowxdr_setc(x, x->offset.EXT_VLAN);
>> +            /* print stripped VLAN tags stack */
>> +            stack_depth = sflowxdr_next(x);
>> +            for (ii = 0; ii < stack_depth; ii++) {
>> +                vlan.qtag=sflowxdr_next_n(x);
>> +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
>> +                       ii, ntohs(vlan.tpid));
>> +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
>> +                       ii, vlan_tci_to_vid(vlan.tci));
>> +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
>> +                       ii, vlan_tci_to_pcp(vlan.tci));
>> +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
>> +                       ii, vlan_tci_to_cfi(vlan.tci));
>> +            }
>> +        }
>> +
>>          if (x->offset.SWITCH) {
>>              sflowxdr_setc(x, x->offset.SWITCH);
>>              printf(" in_vlan=%"PRIu32, sflowxdr_next(x));
>> @@ -634,6 +655,10 @@ process_datagram(struct sflow_xdr *x)
>>                      sflowxdr_mark_unique(x, &x->offset.MPLS);
>>                      break;
>>
>> +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
>> +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
>> +                    break;
>> +
>>                      /* Add others here... */
>>                  }
>>
>> --
>> 1.9.3
>>
>> --------------------------------------------------------------
>> Intel Research and Development Ireland Limited
>> Registered in Ireland
>> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
>> Registered Number: 308263
>>
>>
>> This e-mail and any attachments may contain confidential material for the sole
>> use of the intended recipient(s). Any review or distribution by others is
>> strictly prohibited. If you are not the intended recipient, please contact the
>> sender and delete all copies.
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Lukasz Rzasik April 19, 2017, 2:03 p.m. UTC | #3
Hi Eric,

Thank you for looking at the patch.

I put my comments inline with a tag [Lukasz].

-----Original Message-----
From: Eric Garver [mailto:e@erig.me] 
Sent: Tuesday, April 18, 2017 5:25 PM
To: Rzasik, LukaszX <lukaszx.rzasik@intel.com>
Cc: dev@openvswitch.org; Thomas F Herbert <thomasfherbert@gmail.com>
Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow counters

Hi Lukasz,

Thanks for expanding the 802.1ad support.
I'm not familiar with sFlow, so I'll provide what feedback I can.

On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
> This patch implements QinQ related sFlow counters.
> It is implemented according to sFlow Version 5, 
> http://www.sflow.org/sflow_version_5.txt
> Open vSwitch will send a stack of stripped VLAN tags to sFlow 
> collector if the original packets have multiple VLAN tags.
> 
> Unit tests have been updated accordingly.
> 
> The patch is based on commit f0fb825a37 (Add support for 802.1ad (QinQ 
> tunneling))
> 
> Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
> CC: Thomas F Herbert <thomasfherbert@gmail.com>
> CC: Xiao Liang <shaw.leon@gmail.com>
> CC: Eric Garver <e@erig.me>
> CC: Neil McKee <neil.mckee@inmon.com>
> ---
>  lib/packets.h                |  4 +++
>  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---  
> ofproto/ofproto-dpif-sflow.h |  9 +++++
>  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
>  tests/test-sflow.c           | 25 +++++++++++++
>  5 files changed, 177 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/packets.h b/lib/packets.h index 755f08d..7555dfc 
> 100644
> --- a/lib/packets.h
> +++ b/lib/packets.h
> @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
>          eth_type == htons(ETH_TYPE_VLAN_8021AD);  }
>  
> +static inline bool eth_type_8021Q(ovs_be16 eth_type) {
> +    return eth_type == htons(ETH_TYPE_VLAN_8021Q); }
>  
>  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
>   * lengths. */
> diff --git a/ofproto/ofproto-dpif-sflow.c 
> b/ofproto/ofproto-dpif-sflow.c index 59fafa1..41972e2 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
>      }
>  }
>  
> +static void
> +dpif_sflow_pop_vlan(const struct flow *flow,
> +                    struct dpif_sflow_actions *sflow_actions) {
> +    union flow_vlan_hdr vlan = flow->vlans[0];
> +    if (eth_type_vlan(vlan.tpid)) {
> +        int depth = 0;
> +        int ii;
> +        /* Calculate depth by detecting 8021Q TPID. */
> +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
> +            vlan = flow->vlans[ii];
> +            depth++;
> +            if (eth_type_8021Q(vlan.tpid)) {
> +                break;
> +            }

This seems to be using 0x8100 as a marker to stop. Does this mean double stacked 0x8100 will not work?

flow_count_vlan_headers() can be used to find the number of tags.

[Lukasz] Yes, you are right. I'm using 0x8100 to detect the last VLAN tag and double stacked 0x8100 will not work. As I understand this is incorrect. From looking at flow_count_vlan_headers() it detects the last VLAN tag by checking CFI bit. If it is set to zero than it is the last tag.
I can change the implementation but I could not find any standard or document mentioning such use of that bit. Is it used in this way only internally in OVS or am I missing something? 

> +        }
> +
> +        if (depth > 1) {
> +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
> +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
> +            sflow_actions->vlan_stack_depth++;
> +        }

AFAICS, this only adds the outermost tag to the stack. Is that intentional? It's unclear to me if sFlow expects one or two VLANs to be in the stack.

[Lukasz] It should add more tags if there are multiple OVS_ACTION_ATTR_POP_VLAN actions. sFlow says that only stripped VLAN tags should be reported in the stack.

> +    }
> +}
> +
>  void
>  dpif_sflow_read_actions(const struct flow *flow,
>  			const struct nlattr *actions, size_t actions_len, @@ -1170,11 
> +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
>  	    break;
>  
>  	case OVS_ACTION_ATTR_PUSH_VLAN:
> +            break;
> +
>  	case OVS_ACTION_ATTR_POP_VLAN:
> -	    /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
> -	     * construct a VLAN-stack. The sFlow user-action cookie already
> -	     * captures the egress VLAN ID so there is nothing more to do here.
> -	     */
> +            dpif_sflow_pop_vlan(flow, sflow_actions);
>  	    break;
>  
>  	case OVS_ACTION_ATTR_PUSH_MPLS: {
> @@ -1225,6 +1249,19 @@ dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
>      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;  }
>  
> +static void
> +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
> +                             uint32_t *vlans_buf,
> +                             const struct dpif_sflow_actions 
> +*sflow_actions) {
> +    int ii;
> +    stack->depth = sflow_actions->vlan_stack_depth;
> +    stack->stack = vlans_buf;
> +    for (ii = 0; ii < stack->depth; ii++) {
> +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
> +    }
> +}
> +
>  /* Extract the output port count from the user action cookie.
>   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
>   */
> @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>      SFLFlow_sample_element vniInElem, vniOutElem;
>      SFLFlow_sample_element mplsElem;
>      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
> +    SFLFlow_sample_element vlanTunnelElem;
> +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
>      SFLSampler *sampler;
>      struct dpif_sflow_port *in_dsp;
>      struct dpif_sflow_port *out_dsp;
> @@ -1372,6 +1411,19 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>  	SFLADD_ELEMENT(&fs, &mplsElem);
>      }
>  
> +    /* stripped VLAN tags stack. */
> +    if (sflow_actions
> +        && sflow_actions->vlan_stack_depth > 0
> +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
> +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
> +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
> +        dpif_sflow_encode_vlan_stack(
> +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
> +                                 vlans_buf,
> +                                 sflow_actions);
> +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
> +    }
> +
>      /* Submit the flow sample to be encoded into the next datagram. */
>      SFLADD_ELEMENT(&fs, &hdrElem);
>      SFLADD_ELEMENT(&fs, &switchElem); diff --git 
> a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h index 
> 014e6cc..cfd5492 100644
> --- a/ofproto/ofproto-dpif-sflow.h
> +++ b/ofproto/ofproto-dpif-sflow.h
> @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
>      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
>      uint32_t mpls_stack_depth;               /* Out stack depth. */
>      bool mpls_err;                           /* MPLS actions parse failure. */
> +
> +    /* Using host-byte order for the vlan stack here
> +       to match the expectations of the sFlow library.
> +       List of stripped 802.1Q TPID/TCI layers. Each
> +       TPID,TCI pair is represented as a single 32 bit
> +       integer. Layers listed from outermost to innermost.
> +    */
> +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
> +    uint32_t vlan_stack_depth;                 /* Stack depth. */
>  };
>  
>  struct dpif_sflow *dpif_sflow_create(void); diff --git 
> a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 0c2ea38..f096175 
> 100644
> --- a/tests/ofproto-dpif.at
> +++ b/tests/ofproto-dpif.at
> @@ -6333,6 +6333,89 @@ HEADER
>  OVS_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN 
> +tunnel]) AT_XFAIL_IF([test "$IS_WIN32" = "yes"]) OVS_VSWITCHD_START 
> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) add_of_ports 
> +br0 1 2 AT_DATA([flows.txt], [dnl
> +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
> +])
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +dnl set up sFlow logging
> +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir --pidfile 
> +0:127.0.0.1 > sflow.log], [0], [], [ignore])
> +AT_CAPTURE_FILE([sflow.log])
> +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT]) ovs-appctl 
> +time/stop
> +
> +dnl configure sflow
> +ovs-vsctl \
> +   set Bridge br0 sflow=@sf -- \
> +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
> +     header=128 sampling=1 polling=0
> +
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type
> +(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=20
> +00,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type
> +(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
> +
> +dnl sleep long enough to get the sFlow datagram flushed out (may be 
> +delayed for up to 1 second) for i in `seq 1 30`; do
> +    ovs-appctl time/warp 100
> +done
> +
> +OVS_APP_EXIT_AND_WAIT([test-sflow])
> +
> +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
> +        /g']], [0], [dnl
> +HEADER
> +        dgramSeqNo=1
> +        ds=127.0.0.1>2:1000
> +        fsSeqNo=1
> +        ext_vlan_tpid_0=0x88a8
> +        ext_vlan_vid_0=150
> +        ext_vlan_pcp_0=0
> +        ext_vlan_cfi_0=1
> +        in_vlan=150
> +        in_priority=0
> +        out_vlan=0
> +        out_priority=0
> +        meanSkip=1
> +        samplePool=1
> +        dropEvents=0
> +        in_ifindex=0
> +        in_format=0
> +        out_ifindex=1
> +        out_format=2
> +        hdr_prot=1
> +        pkt_len=22
> +        stripped=4
> +        hdr_len=18
> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
> +HEADER
> +        dgramSeqNo=1
> +        ds=127.0.0.1>2:1000
> +        fsSeqNo=2
> +        in_vlan=2000
> +        in_priority=0
> +        out_vlan=0
> +        out_priority=0
> +        meanSkip=1
> +        samplePool=2
> +        dropEvents=0
> +        in_ifindex=0
> +        in_format=0
> +        out_ifindex=1
> +        out_format=2
> +        hdr_prot=1
> +        pkt_len=42
> +        stripped=4
> +        hdr_len=38
> +        
> +hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-14
> +-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
> +])
> +
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  
>  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
>  #
> diff --git a/tests/test-sflow.c b/tests/test-sflow.c index 
> 6125d38..7d58ef3 100644
> --- a/tests/test-sflow.c
> +++ b/tests/test-sflow.c
> @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;  #define 
> SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029  #define SFLOW_TAG_PKT_TUNNEL_VNI_IN 
> 1030  #define SFLOW_TAG_PKT_MPLS 1006
> +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
>  
>  /* string sizes */
>  #define SFL_MAX_PORTNAME_LEN 255
> @@ -116,6 +117,7 @@ struct sflow_xdr {
>  	uint32_t TUNNEL_VNI_OUT;
>  	uint32_t TUNNEL_VNI_IN;
>  	uint32_t MPLS;
> +        uint32_t EXT_VLAN;
>  	uint32_t IFCOUNTERS;
>  	uint32_t ETHCOUNTERS;
>  	uint32_t LACPCOUNTERS;
> @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
>              }
>          }
>  
> +        if (x->offset.EXT_VLAN) {
> +            uint32_t stack_depth, ii;
> +            union flow_vlan_hdr vlan;
> +            sflowxdr_setc(x, x->offset.EXT_VLAN);
> +            /* print stripped VLAN tags stack */
> +            stack_depth = sflowxdr_next(x);
> +            for (ii = 0; ii < stack_depth; ii++) {
> +                vlan.qtag=sflowxdr_next_n(x);
> +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
> +                       ii, ntohs(vlan.tpid));
> +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
> +                       ii, vlan_tci_to_vid(vlan.tci));
> +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
> +                       ii, vlan_tci_to_pcp(vlan.tci));
> +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
> +                       ii, vlan_tci_to_cfi(vlan.tci));
> +            }
> +        }
> +
>          if (x->offset.SWITCH) {
>              sflowxdr_setc(x, x->offset.SWITCH);
>              printf(" in_vlan=%"PRIu32, sflowxdr_next(x)); @@ -634,6 
> +655,10 @@ process_datagram(struct sflow_xdr *x)
>                      sflowxdr_mark_unique(x, &x->offset.MPLS);
>                      break;
>  
> +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
> +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
> +                    break;
> +
>                      /* Add others here... */
>                  }
>  
> --
> 1.9.3
> 
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited Registered in Ireland 
> Registered Office: Collinstown Industrial Park, Leixlip, County 
> Kildare Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for 
> the sole use of the intended recipient(s). Any review or distribution 
> by others is strictly prohibited. If you are not the intended 
> recipient, please contact the sender and delete all copies.
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
Eric Garver April 19, 2017, 2:52 p.m. UTC | #4
On Wed, Apr 19, 2017 at 02:03:15PM +0000, Rzasik, LukaszX wrote:
> Hi Eric,
> 
> Thank you for looking at the patch.
> 
> I put my comments inline with a tag [Lukasz].
> 
> -----Original Message-----
> From: Eric Garver [mailto:e@erig.me] 
> Sent: Tuesday, April 18, 2017 5:25 PM
> To: Rzasik, LukaszX <lukaszx.rzasik@intel.com>
> Cc: dev@openvswitch.org; Thomas F Herbert <thomasfherbert@gmail.com>
> Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow counters
> 
> Hi Lukasz,
> 
> Thanks for expanding the 802.1ad support.
> I'm not familiar with sFlow, so I'll provide what feedback I can.
> 
> On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
> > This patch implements QinQ related sFlow counters.
> > It is implemented according to sFlow Version 5, 
> > http://www.sflow.org/sflow_version_5.txt
> > Open vSwitch will send a stack of stripped VLAN tags to sFlow 
> > collector if the original packets have multiple VLAN tags.
> > 
> > Unit tests have been updated accordingly.
> > 
> > The patch is based on commit f0fb825a37 (Add support for 802.1ad (QinQ 
> > tunneling))
> > 
> > Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
> > CC: Thomas F Herbert <thomasfherbert@gmail.com>
> > CC: Xiao Liang <shaw.leon@gmail.com>
> > CC: Eric Garver <e@erig.me>
> > CC: Neil McKee <neil.mckee@inmon.com>
> > ---
> >  lib/packets.h                |  4 +++
> >  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---  
> > ofproto/ofproto-dpif-sflow.h |  9 +++++
> >  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
> >  tests/test-sflow.c           | 25 +++++++++++++
> >  5 files changed, 177 insertions(+), 4 deletions(-)
> > 
> > diff --git a/lib/packets.h b/lib/packets.h index 755f08d..7555dfc 
> > 100644
> > --- a/lib/packets.h
> > +++ b/lib/packets.h
> > @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
> >          eth_type == htons(ETH_TYPE_VLAN_8021AD);  }
> >  
> > +static inline bool eth_type_8021Q(ovs_be16 eth_type) {
> > +    return eth_type == htons(ETH_TYPE_VLAN_8021Q); }
> >  
> >  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
> >   * lengths. */
> > diff --git a/ofproto/ofproto-dpif-sflow.c 
> > b/ofproto/ofproto-dpif-sflow.c index 59fafa1..41972e2 100644
> > --- a/ofproto/ofproto-dpif-sflow.c
> > +++ b/ofproto/ofproto-dpif-sflow.c
> > @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
> >      }
> >  }
> >  
> > +static void
> > +dpif_sflow_pop_vlan(const struct flow *flow,
> > +                    struct dpif_sflow_actions *sflow_actions) {
> > +    union flow_vlan_hdr vlan = flow->vlans[0];
> > +    if (eth_type_vlan(vlan.tpid)) {
> > +        int depth = 0;
> > +        int ii;
> > +        /* Calculate depth by detecting 8021Q TPID. */
> > +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
> > +            vlan = flow->vlans[ii];
> > +            depth++;
> > +            if (eth_type_8021Q(vlan.tpid)) {
> > +                break;
> > +            }
> 
> This seems to be using 0x8100 as a marker to stop. Does this mean double stacked 0x8100 will not work?
> 
> flow_count_vlan_headers() can be used to find the number of tags.
> 
> [Lukasz] Yes, you are right. I'm using 0x8100 to detect the last VLAN tag and double stacked 0x8100 will not work. As I understand this is incorrect. From looking at flow_count_vlan_headers() it detects the last VLAN tag by checking CFI bit. If it is set to zero than it is the last tag.

Using the CFI bit is kind of historical. Before 802.1ad the TPID was not
tracked at all. So the CFI bit was used as a marker to mean "VLAN tag
present" allowing a VID of 0.

> I can change the implementation but I could not find any standard or document mentioning such use of that bit. Is it used in this way only internally in OVS or am I missing something? 

You should be able to key off either the TPID or the CFI bit. Various
points of the code check one or the other. I think what you have is okay
- if not I'd consider it a bug in the flow parsing code.

> > +        }
> > +
> > +        if (depth > 1) {
> > +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
> > +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
> > +            sflow_actions->vlan_stack_depth++;
> > +        }
> 
> AFAICS, this only adds the outermost tag to the stack. Is that intentional? It's unclear to me if sFlow expects one or two VLANs to be in the stack.
> 
> [Lukasz] It should add more tags if there are multiple OVS_ACTION_ATTR_POP_VLAN actions. sFlow says that only stripped VLAN tags should be reported in the stack.

Okay. That makes sense to me. But please take Neil's email under
advisement.

> 
> > +    }
> > +}
> > +
> >  void
> >  dpif_sflow_read_actions(const struct flow *flow,
> >  			const struct nlattr *actions, size_t actions_len, @@ -1170,11 
> > +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
> >  	    break;
> >  
> >  	case OVS_ACTION_ATTR_PUSH_VLAN:
> > +            break;
> > +
> >  	case OVS_ACTION_ATTR_POP_VLAN:
> > -	    /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
> > -	     * construct a VLAN-stack. The sFlow user-action cookie already
> > -	     * captures the egress VLAN ID so there is nothing more to do here.
> > -	     */
> > +            dpif_sflow_pop_vlan(flow, sflow_actions);
> >  	    break;
> >  
> >  	case OVS_ACTION_ATTR_PUSH_MPLS: {
> > @@ -1225,6 +1249,19 @@ dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
> >      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;  }
> >  
> > +static void
> > +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
> > +                             uint32_t *vlans_buf,
> > +                             const struct dpif_sflow_actions 
> > +*sflow_actions) {
> > +    int ii;
> > +    stack->depth = sflow_actions->vlan_stack_depth;
> > +    stack->stack = vlans_buf;
> > +    for (ii = 0; ii < stack->depth; ii++) {
> > +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
> > +    }
> > +}
> > +
> >  /* Extract the output port count from the user action cookie.
> >   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
> >   */
> > @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> >      SFLFlow_sample_element vniInElem, vniOutElem;
> >      SFLFlow_sample_element mplsElem;
> >      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
> > +    SFLFlow_sample_element vlanTunnelElem;
> > +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
> >      SFLSampler *sampler;
> >      struct dpif_sflow_port *in_dsp;
> >      struct dpif_sflow_port *out_dsp;
> > @@ -1372,6 +1411,19 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> >  	SFLADD_ELEMENT(&fs, &mplsElem);
> >      }
> >  
> > +    /* stripped VLAN tags stack. */
> > +    if (sflow_actions
> > +        && sflow_actions->vlan_stack_depth > 0
> > +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
> > +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
> > +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
> > +        dpif_sflow_encode_vlan_stack(
> > +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
> > +                                 vlans_buf,
> > +                                 sflow_actions);
> > +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
> > +    }
> > +
> >      /* Submit the flow sample to be encoded into the next datagram. */
> >      SFLADD_ELEMENT(&fs, &hdrElem);
> >      SFLADD_ELEMENT(&fs, &switchElem); diff --git 
> > a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h index 
> > 014e6cc..cfd5492 100644
> > --- a/ofproto/ofproto-dpif-sflow.h
> > +++ b/ofproto/ofproto-dpif-sflow.h
> > @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
> >      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
> >      uint32_t mpls_stack_depth;               /* Out stack depth. */
> >      bool mpls_err;                           /* MPLS actions parse failure. */
> > +
> > +    /* Using host-byte order for the vlan stack here
> > +       to match the expectations of the sFlow library.
> > +       List of stripped 802.1Q TPID/TCI layers. Each
> > +       TPID,TCI pair is represented as a single 32 bit
> > +       integer. Layers listed from outermost to innermost.
> > +    */
> > +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
> > +    uint32_t vlan_stack_depth;                 /* Stack depth. */
> >  };
> >  
> >  struct dpif_sflow *dpif_sflow_create(void); diff --git 
> > a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 0c2ea38..f096175 
> > 100644
> > --- a/tests/ofproto-dpif.at
> > +++ b/tests/ofproto-dpif.at
> > @@ -6333,6 +6333,89 @@ HEADER
> >  OVS_VSWITCHD_STOP
> >  AT_CLEANUP
> >  
> > +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN 
> > +tunnel]) AT_XFAIL_IF([test "$IS_WIN32" = "yes"]) OVS_VSWITCHD_START 
> > +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) add_of_ports 
> > +br0 1 2 AT_DATA([flows.txt], [dnl
> > +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
> > +])
> > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> > +
> > +dnl set up sFlow logging
> > +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir --pidfile 
> > +0:127.0.0.1 > sflow.log], [0], [], [ignore])
> > +AT_CAPTURE_FILE([sflow.log])
> > +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT]) ovs-appctl 
> > +time/stop
> > +
> > +dnl configure sflow
> > +ovs-vsctl \
> > +   set Bridge br0 sflow=@sf -- \
> > +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
> > +     header=128 sampling=1 polling=0
> > +
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
> > +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type
> > +(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=20
> > +00,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
> > +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type
> > +(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
> > +
> > +dnl sleep long enough to get the sFlow datagram flushed out (may be 
> > +delayed for up to 1 second) for i in `seq 1 30`; do
> > +    ovs-appctl time/warp 100
> > +done
> > +
> > +OVS_APP_EXIT_AND_WAIT([test-sflow])
> > +
> > +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
> > +        /g']], [0], [dnl
> > +HEADER
> > +        dgramSeqNo=1
> > +        ds=127.0.0.1>2:1000
> > +        fsSeqNo=1
> > +        ext_vlan_tpid_0=0x88a8
> > +        ext_vlan_vid_0=150
> > +        ext_vlan_pcp_0=0
> > +        ext_vlan_cfi_0=1
> > +        in_vlan=150
> > +        in_priority=0
> > +        out_vlan=0
> > +        out_priority=0
> > +        meanSkip=1
> > +        samplePool=1
> > +        dropEvents=0
> > +        in_ifindex=0
> > +        in_format=0
> > +        out_ifindex=1
> > +        out_format=2
> > +        hdr_prot=1
> > +        pkt_len=22
> > +        stripped=4
> > +        hdr_len=18
> > +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
> > +HEADER
> > +        dgramSeqNo=1
> > +        ds=127.0.0.1>2:1000
> > +        fsSeqNo=2
> > +        in_vlan=2000
> > +        in_priority=0
> > +        out_vlan=0
> > +        out_priority=0
> > +        meanSkip=1
> > +        samplePool=2
> > +        dropEvents=0
> > +        in_ifindex=0
> > +        in_format=0
> > +        out_ifindex=1
> > +        out_format=2
> > +        hdr_prot=1
> > +        pkt_len=42
> > +        stripped=4
> > +        hdr_len=38
> > +        
> > +hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-14
> > +-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
> > +])
> > +
> > +OVS_VSWITCHD_STOP
> > +AT_CLEANUP
> > +
> >  
> >  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
> >  #
> > diff --git a/tests/test-sflow.c b/tests/test-sflow.c index 
> > 6125d38..7d58ef3 100644
> > --- a/tests/test-sflow.c
> > +++ b/tests/test-sflow.c
> > @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;  #define 
> > SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029  #define SFLOW_TAG_PKT_TUNNEL_VNI_IN 
> > 1030  #define SFLOW_TAG_PKT_MPLS 1006
> > +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
> >  
> >  /* string sizes */
> >  #define SFL_MAX_PORTNAME_LEN 255
> > @@ -116,6 +117,7 @@ struct sflow_xdr {
> >  	uint32_t TUNNEL_VNI_OUT;
> >  	uint32_t TUNNEL_VNI_IN;
> >  	uint32_t MPLS;
> > +        uint32_t EXT_VLAN;
> >  	uint32_t IFCOUNTERS;
> >  	uint32_t ETHCOUNTERS;
> >  	uint32_t LACPCOUNTERS;
> > @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
> >              }
> >          }
> >  
> > +        if (x->offset.EXT_VLAN) {
> > +            uint32_t stack_depth, ii;
> > +            union flow_vlan_hdr vlan;
> > +            sflowxdr_setc(x, x->offset.EXT_VLAN);
> > +            /* print stripped VLAN tags stack */
> > +            stack_depth = sflowxdr_next(x);
> > +            for (ii = 0; ii < stack_depth; ii++) {
> > +                vlan.qtag=sflowxdr_next_n(x);
> > +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
> > +                       ii, ntohs(vlan.tpid));
> > +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
> > +                       ii, vlan_tci_to_vid(vlan.tci));
> > +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
> > +                       ii, vlan_tci_to_pcp(vlan.tci));
> > +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
> > +                       ii, vlan_tci_to_cfi(vlan.tci));
> > +            }
> > +        }
> > +
> >          if (x->offset.SWITCH) {
> >              sflowxdr_setc(x, x->offset.SWITCH);
> >              printf(" in_vlan=%"PRIu32, sflowxdr_next(x)); @@ -634,6 
> > +655,10 @@ process_datagram(struct sflow_xdr *x)
> >                      sflowxdr_mark_unique(x, &x->offset.MPLS);
> >                      break;
> >  
> > +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
> > +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
> > +                    break;
> > +
> >                      /* Add others here... */
> >                  }
> >  
> > --
> > 1.9.3
> > 
> > --------------------------------------------------------------
> > Intel Research and Development Ireland Limited Registered in Ireland 
> > Registered Office: Collinstown Industrial Park, Leixlip, County 
> > Kildare Registered Number: 308263
> > 
> > 
> > This e-mail and any attachments may contain confidential material for 
> > the sole use of the intended recipient(s). Any review or distribution 
> > by others is strictly prohibited. If you are not the intended 
> > recipient, please contact the sender and delete all copies.
> > 
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
> 
> 
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Lukasz Rzasik April 20, 2017, 12:39 p.m. UTC | #5
Hi Neil,

Thanks for the comments. I'll try to explain what is my purpose and why I implemented it that way.

I'm aiming at implementing QinQ related sFlow counters and it seemed that extended_vlantunnel is the right place.
The only other place that I can find where VLAN tags can be reported is extended_switch but there is place only for a single src_vlan and dst_vlan. 

From the description of extended_vlantunnel it fits QinQ scenario ideally. I tried to keep my implementation as close as possible with the description. It does not report all VLAN tags. It reports only stripped VLAN tags when there are multiple tags in the packet. 

Also TODO that you left in the code made me confident that it is the right approach:
/* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
  * construct a VLAN-stack. The sFlow user-action cookie already
  * captures the egress VLAN ID so there is nothing more to do here.
  */ 

After your comment I looked for some more information about using extended_vlantunnel for reporting non-standard 802.1Q headers but I cannot find any information about that in the sFlow spec. 

As you mentioned, the spec states only these three conditions:
     1. The packet has nested vlan tags, AND
     2. The reporting device is VLAN aware, AND
     3. One or more VLAN tags have been stripped, either
        because they represent proprietary encapsulations, or
        because switch hardware automatically strips the outer VLAN
        encapsulation.

And I think they are satisfied in the implementation. 

Could you explain in more details why you think this is not the right place to report stripped VLAN tags in QinQ scenario?

The implementation does not consider 0x9100 ethernet type as I could not find it anywhere used in OVS code.
I could extend the implementation to take it into account as the alternative for 0x88a8. 

Thanks!

-----Original Message-----
From: Neil McKee [mailto:neil.mckee@inmon.com] 
Sent: Tuesday, April 18, 2017 7:17 PM
To: Eric Garver <e@erig.me>; Rzasik, LukaszX <lukaszx.rzasik@intel.com>; dev@openvswitch.org; Thomas F Herbert <thomasfherbert@gmail.com>
Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow counters

This patch seems like it may be reporting a VLAN stack that can already be decoded from the (ingress-sampled) packet header?

The TODO I left in the code may have been misleading...  this structure is intended for the case where the sampled packet-header includes non-standard 802.1Q headers -- which then have to be stripped out of the sampled header to make it decodable,  or where the 802.1Q headers have already been stripped out of the packet before you sampled it (e.g. by hardware).

 Here is the wording from the sFlow spec at
http://sflow.org/sflow_version_5.txt:

/* Extended VLAN tunnel information
   Record outer VLAN encapsulations that have
   been stripped. extended_vlantunnel information
   should only be reported if all the following conditions are satisfied:
     1. The packet has nested vlan tags, AND
     2. The reporting device is VLAN aware, AND
     3. One or more VLAN tags have been stripped, either
        because they represent proprietary encapsulations, or
        because switch hardware automatically strips the outer VLAN
        encapsulation.
   Reporting extended_vlantunnel information is not a substitute for
   reporting extended_switch information. extended_switch data must
   always be reported to describe the ingress/egress VLAN information
   for the packet. The extended_vlantunnel information only applies to
   nested VLAN tags, and then only when one or more tags has been
   stripped. */
/* opaque = flow_data; enterprise = 0; format = 1012 */ extended_vlantunnel {
  unsigned int stack<>;  /* List of stripped 802.1Q TPID/TCI layers. Each
                            TPID,TCI pair is represented as a single 32 bit
                            integer. Layers listed from outermost to
                            innermost. */ }

My impression is that non-standard 802.1Q headers are now rare (particularly where OVS is concerned).  i.e. everyone uses the
standard ethernet types 0x8100, 0x9100 etc.   So it's unlikely that
the OVS sFlow agent will need to doctor the sampled header to remove non-standard 802.1Q headers.

[Quite separately,  if the OVS flow actions included instructions to
*push* vlans onto the VLAN stack before forwarding the packet,  then it would make sense to report that in the sFlow export just as we do when packets are pushed onto other kinds of tunnel (MPLS, VXLAN etc.).
  Unfortunately there doesn't seem to be an sFlow structure that represents this today (e.g. in http://sflow.org/sflow_tunnels.txt)]
------
Neil McKee
InMon Corp.
http://www.inmon.com


On Tue, Apr 18, 2017 at 8:24 AM, Eric Garver <e@erig.me> wrote:
> Hi Lukasz,
>
> Thanks for expanding the 802.1ad support.
> I'm not familiar with sFlow, so I'll provide what feedback I can.
>
> On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
>> This patch implements QinQ related sFlow counters.
>> It is implemented according to sFlow Version 5, 
>> http://www.sflow.org/sflow_version_5.txt
>> Open vSwitch will send a stack of stripped VLAN tags to sFlow 
>> collector if the original packets have multiple VLAN tags.
>>
>> Unit tests have been updated accordingly.
>>
>> The patch is based on commit f0fb825a37 (Add support for 802.1ad 
>> (QinQ tunneling))
>>
>> Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
>> CC: Thomas F Herbert <thomasfherbert@gmail.com>
>> CC: Xiao Liang <shaw.leon@gmail.com>
>> CC: Eric Garver <e@erig.me>
>> CC: Neil McKee <neil.mckee@inmon.com>
>> ---
>>  lib/packets.h                |  4 +++
>>  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---  
>> ofproto/ofproto-dpif-sflow.h |  9 +++++
>>  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
>>  tests/test-sflow.c           | 25 +++++++++++++
>>  5 files changed, 177 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/packets.h b/lib/packets.h index 755f08d..7555dfc 
>> 100644
>> --- a/lib/packets.h
>> +++ b/lib/packets.h
>> @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
>>          eth_type == htons(ETH_TYPE_VLAN_8021AD);  }
>>
>> +static inline bool eth_type_8021Q(ovs_be16 eth_type) {
>> +    return eth_type == htons(ETH_TYPE_VLAN_8021Q); }
>>
>>  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
>>   * lengths. */
>> diff --git a/ofproto/ofproto-dpif-sflow.c 
>> b/ofproto/ofproto-dpif-sflow.c index 59fafa1..41972e2 100644
>> --- a/ofproto/ofproto-dpif-sflow.c
>> +++ b/ofproto/ofproto-dpif-sflow.c
>> @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
>>      }
>>  }
>>
>> +static void
>> +dpif_sflow_pop_vlan(const struct flow *flow,
>> +                    struct dpif_sflow_actions *sflow_actions) {
>> +    union flow_vlan_hdr vlan = flow->vlans[0];
>> +    if (eth_type_vlan(vlan.tpid)) {
>> +        int depth = 0;
>> +        int ii;
>> +        /* Calculate depth by detecting 8021Q TPID. */
>> +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
>> +            vlan = flow->vlans[ii];
>> +            depth++;
>> +            if (eth_type_8021Q(vlan.tpid)) {
>> +                break;
>> +            }
>
> This seems to be using 0x8100 as a marker to stop. Does this mean 
> double stacked 0x8100 will not work?
>
> flow_count_vlan_headers() can be used to find the number of tags.
>
>> +        }
>> +
>> +        if (depth > 1) {
>> +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
>> +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
>> +            sflow_actions->vlan_stack_depth++;
>> +        }
>
> AFAICS, this only adds the outermost tag to the stack. Is that 
> intentional? It's unclear to me if sFlow expects one or two VLANs to 
> be in the stack.
>
>> +    }
>> +}
>> +
>>  void
>>  dpif_sflow_read_actions(const struct flow *flow,
>>                       const struct nlattr *actions, size_t 
>> actions_len, @@ -1170,11 +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
>>           break;
>>
>>       case OVS_ACTION_ATTR_PUSH_VLAN:
>> +            break;
>> +
>>       case OVS_ACTION_ATTR_POP_VLAN:
>> -         /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
>> -          * construct a VLAN-stack. The sFlow user-action cookie already
>> -          * captures the egress VLAN ID so there is nothing more to do here.
>> -          */
>> +            dpif_sflow_pop_vlan(flow, sflow_actions);
>>           break;
>>
>>       case OVS_ACTION_ATTR_PUSH_MPLS: { @@ -1225,6 +1249,19 @@ 
>> dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
>>      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;  }
>>
>> +static void
>> +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
>> +                             uint32_t *vlans_buf,
>> +                             const struct dpif_sflow_actions 
>> +*sflow_actions) {
>> +    int ii;
>> +    stack->depth = sflow_actions->vlan_stack_depth;
>> +    stack->stack = vlans_buf;
>> +    for (ii = 0; ii < stack->depth; ii++) {
>> +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
>> +    }
>> +}
>> +
>>  /* Extract the output port count from the user action cookie.
>>   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
>>   */
>> @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>>      SFLFlow_sample_element vniInElem, vniOutElem;
>>      SFLFlow_sample_element mplsElem;
>>      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
>> +    SFLFlow_sample_element vlanTunnelElem;
>> +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
>>      SFLSampler *sampler;
>>      struct dpif_sflow_port *in_dsp;
>>      struct dpif_sflow_port *out_dsp; @@ -1372,6 +1411,19 @@ 
>> dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
>>       SFLADD_ELEMENT(&fs, &mplsElem);
>>      }
>>
>> +    /* stripped VLAN tags stack. */
>> +    if (sflow_actions
>> +        && sflow_actions->vlan_stack_depth > 0
>> +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
>> +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
>> +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
>> +        dpif_sflow_encode_vlan_stack(
>> +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
>> +                                 vlans_buf,
>> +                                 sflow_actions);
>> +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
>> +    }
>> +
>>      /* Submit the flow sample to be encoded into the next datagram. */
>>      SFLADD_ELEMENT(&fs, &hdrElem);
>>      SFLADD_ELEMENT(&fs, &switchElem); diff --git 
>> a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h index 
>> 014e6cc..cfd5492 100644
>> --- a/ofproto/ofproto-dpif-sflow.h
>> +++ b/ofproto/ofproto-dpif-sflow.h
>> @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
>>      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
>>      uint32_t mpls_stack_depth;               /* Out stack depth. */
>>      bool mpls_err;                           /* MPLS actions parse failure. */
>> +
>> +    /* Using host-byte order for the vlan stack here
>> +       to match the expectations of the sFlow library.
>> +       List of stripped 802.1Q TPID/TCI layers. Each
>> +       TPID,TCI pair is represented as a single 32 bit
>> +       integer. Layers listed from outermost to innermost.
>> +    */
>> +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
>> +    uint32_t vlan_stack_depth;                 /* Stack depth. */
>>  };
>>
>>  struct dpif_sflow *dpif_sflow_create(void); diff --git 
>> a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 
>> 0c2ea38..f096175 100644
>> --- a/tests/ofproto-dpif.at
>> +++ b/tests/ofproto-dpif.at
>> @@ -6333,6 +6333,89 @@ HEADER
>>  OVS_VSWITCHD_STOP
>>  AT_CLEANUP
>>
>> +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN 
>> +tunnel]) AT_XFAIL_IF([test "$IS_WIN32" = "yes"]) OVS_VSWITCHD_START 
>> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) 
>> +add_of_ports br0 1 2 AT_DATA([flows.txt], [dnl
>> +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
>> +])
>> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
>> +
>> +dnl set up sFlow logging
>> +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir 
>> +--pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
>> +AT_CAPTURE_FILE([sflow.log])
>> +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT]) ovs-appctl 
>> +time/stop
>> +
>> +dnl configure sflow
>> +ovs-vsctl \
>> +   set Bridge br0 sflow=@sf -- \
>> +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
>> +     header=128 sampling=1 polling=0
>> +
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
>> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_typ
>> +e(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=
>> +2000,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
>> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
>> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_typ
>> +e(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
>> +
>> +dnl sleep long enough to get the sFlow datagram flushed out (may be 
>> +delayed for up to 1 second) for i in `seq 1 30`; do
>> +    ovs-appctl time/warp 100
>> +done
>> +
>> +OVS_APP_EXIT_AND_WAIT([test-sflow])
>> +
>> +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
>> +        /g']], [0], [dnl
>> +HEADER
>> +        dgramSeqNo=1
>> +        ds=127.0.0.1>2:1000
>> +        fsSeqNo=1
>> +        ext_vlan_tpid_0=0x88a8
>> +        ext_vlan_vid_0=150
>> +        ext_vlan_pcp_0=0
>> +        ext_vlan_cfi_0=1
>> +        in_vlan=150
>> +        in_priority=0
>> +        out_vlan=0
>> +        out_priority=0
>> +        meanSkip=1
>> +        samplePool=1
>> +        dropEvents=0
>> +        in_ifindex=0
>> +        in_format=0
>> +        out_ifindex=1
>> +        out_format=2
>> +        hdr_prot=1
>> +        pkt_len=22
>> +        stripped=4
>> +        hdr_len=18
>> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
>> +HEADER
>> +        dgramSeqNo=1
>> +        ds=127.0.0.1>2:1000
>> +        fsSeqNo=2
>> +        in_vlan=2000
>> +        in_priority=0
>> +        out_vlan=0
>> +        out_priority=0
>> +        meanSkip=1
>> +        samplePool=2
>> +        dropEvents=0
>> +        in_ifindex=0
>> +        in_format=0
>> +        out_ifindex=1
>> +        out_format=2
>> +        hdr_prot=1
>> +        pkt_len=42
>> +        stripped=4
>> +        hdr_len=38
>> +        
>> +hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-1
>> +4-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
>> +])
>> +
>> +OVS_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>>
>>  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
>>  #
>> diff --git a/tests/test-sflow.c b/tests/test-sflow.c index 
>> 6125d38..7d58ef3 100644
>> --- a/tests/test-sflow.c
>> +++ b/tests/test-sflow.c
>> @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;  #define 
>> SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029  #define 
>> SFLOW_TAG_PKT_TUNNEL_VNI_IN 1030  #define SFLOW_TAG_PKT_MPLS 1006
>> +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
>>
>>  /* string sizes */
>>  #define SFL_MAX_PORTNAME_LEN 255
>> @@ -116,6 +117,7 @@ struct sflow_xdr {
>>       uint32_t TUNNEL_VNI_OUT;
>>       uint32_t TUNNEL_VNI_IN;
>>       uint32_t MPLS;
>> +        uint32_t EXT_VLAN;
>>       uint32_t IFCOUNTERS;
>>       uint32_t ETHCOUNTERS;
>>       uint32_t LACPCOUNTERS;
>> @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
>>              }
>>          }
>>
>> +        if (x->offset.EXT_VLAN) {
>> +            uint32_t stack_depth, ii;
>> +            union flow_vlan_hdr vlan;
>> +            sflowxdr_setc(x, x->offset.EXT_VLAN);
>> +            /* print stripped VLAN tags stack */
>> +            stack_depth = sflowxdr_next(x);
>> +            for (ii = 0; ii < stack_depth; ii++) {
>> +                vlan.qtag=sflowxdr_next_n(x);
>> +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
>> +                       ii, ntohs(vlan.tpid));
>> +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
>> +                       ii, vlan_tci_to_vid(vlan.tci));
>> +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
>> +                       ii, vlan_tci_to_pcp(vlan.tci));
>> +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
>> +                       ii, vlan_tci_to_cfi(vlan.tci));
>> +            }
>> +        }
>> +
>>          if (x->offset.SWITCH) {
>>              sflowxdr_setc(x, x->offset.SWITCH);
>>              printf(" in_vlan=%"PRIu32, sflowxdr_next(x)); @@ -634,6 
>> +655,10 @@ process_datagram(struct sflow_xdr *x)
>>                      sflowxdr_mark_unique(x, &x->offset.MPLS);
>>                      break;
>>
>> +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
>> +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
>> +                    break;
>> +
>>                      /* Add others here... */
>>                  }
>>
>> --
>> 1.9.3
>>
>> --------------------------------------------------------------
>> Intel Research and Development Ireland Limited Registered in Ireland 
>> Registered Office: Collinstown Industrial Park, Leixlip, County 
>> Kildare Registered Number: 308263
>>
>>
>> This e-mail and any attachments may contain confidential material for 
>> the sole use of the intended recipient(s). Any review or distribution 
>> by others is strictly prohibited. If you are not the intended 
>> recipient, please contact the sender and delete all copies.
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
Neil McKee April 20, 2017, 5:50 p.m. UTC | #6
The purpose of the extended_vlantunnel structure was to help describe
the packet at the point of sampling, not to describe encap/decap
forwarding actions that were subsequently applied.  We will need to
define new structures on sFlow.org to describe QinQ encap/decap
actions,  similar to http://sflow.org/sflow_tunnels.txt.  Please start
a thread at sFlow.org to get that rolling.  Once we have those new
sFlow structures defined your patch will probably look mostly the
same.

So what was extended_vlantunnel really for?  To clarify, let's take a
specific example: the test packet in your patch.  It shows a standard
QinQ header using Ethernet types 0x88A8 and 0x8100.  If you put that
into Wireshark it will have no trouble decoding it correctly.  There
is nothing we can add to make it clearer.  So the sFlow collector
should decode it and extract the tags (I know of several that will).
No need for any further annotations.  However, if those ethernet types
were completely arbitrary and non-standard (so that Wireshark would be
defeated) then you would doctor the sampled packet header, remove the
two 802.1Q headers and add 8 to the "stripped" field.  Only then would
you report the tags in an extended_vlantunnel annotation.

I don't think OVS is likely to encounter non-standard ethernet types
in that way,  or hardware that strips vlan tags from the packet before
software even sees it.  So extended_vlantunnel can perhaps be regarded
as an anachronism that dates back to wild-west days?



On Thu, Apr 20, 2017 at 5:39 AM Rzasik, LukaszX
<lukaszx.rzasik@intel.com> wrote:
>
> Hi Neil,
>
> Thanks for the comments. I'll try to explain what is my purpose and why I implemented it that way.
>
> I'm aiming at implementing QinQ related sFlow counters and it seemed that extended_vlantunnel is the right place.
> The only other place that I can find where VLAN tags can be reported is extended_switch but there is place only for a single src_vlan and dst_vlan.
>
> From the description of extended_vlantunnel it fits QinQ scenario ideally. I tried to keep my implementation as close as possible with the description. It does not report all VLAN tags. It reports only stripped VLAN tags when there are multiple tags in the packet.
>
> Also TODO that you left in the code made me confident that it is the right approach:
> /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
>   * construct a VLAN-stack. The sFlow user-action cookie already
>   * captures the egress VLAN ID so there is nothing more to do here.
>   */
>
> After your comment I looked for some more information about using extended_vlantunnel for reporting non-standard 802.1Q headers but I cannot find any information about that in the sFlow spec.
>
> As you mentioned, the spec states only these three conditions:
>      1. The packet has nested vlan tags, AND
>      2. The reporting device is VLAN aware, AND
>      3. One or more VLAN tags have been stripped, either
>         because they represent proprietary encapsulations, or
>         because switch hardware automatically strips the outer VLAN
>         encapsulation.
>
> And I think they are satisfied in the implementation.
>
> Could you explain in more details why you think this is not the right place to report stripped VLAN tags in QinQ scenario?
>
> The implementation does not consider 0x9100 ethernet type as I could not find it anywhere used in OVS code.
> I could extend the implementation to take it into account as the alternative for 0x88a8.
>
> Thanks!
>
> -----Original Message-----
> From: Neil McKee [mailto:neil.mckee@inmon.com]
> Sent: Tuesday, April 18, 2017 7:17 PM
> To: Eric Garver <e@erig.me>; Rzasik, LukaszX <lukaszx.rzasik@intel.com>; dev@openvswitch.org; Thomas F Herbert <thomasfherbert@gmail.com>
> Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow counters
>
> This patch seems like it may be reporting a VLAN stack that can already be decoded from the (ingress-sampled) packet header?
>
> The TODO I left in the code may have been misleading...  this structure is intended for the case where the sampled packet-header includes non-standard 802.1Q headers -- which then have to be stripped out of the sampled header to make it decodable,  or where the 802.1Q headers have already been stripped out of the packet before you sampled it (e.g. by hardware).
>
>  Here is the wording from the sFlow spec at
> http://sflow.org/sflow_version_5.txt:
>
> /* Extended VLAN tunnel information
>    Record outer VLAN encapsulations that have
>    been stripped. extended_vlantunnel information
>    should only be reported if all the following conditions are satisfied:
>      1. The packet has nested vlan tags, AND
>      2. The reporting device is VLAN aware, AND
>      3. One or more VLAN tags have been stripped, either
>         because they represent proprietary encapsulations, or
>         because switch hardware automatically strips the outer VLAN
>         encapsulation.
>    Reporting extended_vlantunnel information is not a substitute for
>    reporting extended_switch information. extended_switch data must
>    always be reported to describe the ingress/egress VLAN information
>    for the packet. The extended_vlantunnel information only applies to
>    nested VLAN tags, and then only when one or more tags has been
>    stripped. */
> /* opaque = flow_data; enterprise = 0; format = 1012 */ extended_vlantunnel {
>   unsigned int stack<>;  /* List of stripped 802.1Q TPID/TCI layers. Each
>                             TPID,TCI pair is represented as a single 32 bit
>                             integer. Layers listed from outermost to
>                             innermost. */ }
>
> My impression is that non-standard 802.1Q headers are now rare (particularly where OVS is concerned).  i.e. everyone uses the
> standard ethernet types 0x8100, 0x9100 etc.   So it's unlikely that
> the OVS sFlow agent will need to doctor the sampled header to remove non-standard 802.1Q headers.
>
> [Quite separately,  if the OVS flow actions included instructions to
> *push* vlans onto the VLAN stack before forwarding the packet,  then it would make sense to report that in the sFlow export just as we do when packets are pushed onto other kinds of tunnel (MPLS, VXLAN etc.).
>   Unfortunately there doesn't seem to be an sFlow structure that represents this today (e.g. in http://sflow.org/sflow_tunnels.txt)]
> ------
> Neil McKee
> InMon Corp.
> http://www.inmon.com
>
>
> On Tue, Apr 18, 2017 at 8:24 AM, Eric Garver <e@erig.me> wrote:
> > Hi Lukasz,
> >
> > Thanks for expanding the 802.1ad support.
> > I'm not familiar with sFlow, so I'll provide what feedback I can.
> >
> > On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
> >> This patch implements QinQ related sFlow counters.
> >> It is implemented according to sFlow Version 5,
> >> http://www.sflow.org/sflow_version_5.txt
> >> Open vSwitch will send a stack of stripped VLAN tags to sFlow
> >> collector if the original packets have multiple VLAN tags.
> >>
> >> Unit tests have been updated accordingly.
> >>
> >> The patch is based on commit f0fb825a37 (Add support for 802.1ad
> >> (QinQ tunneling))
> >>
> >> Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
> >> CC: Thomas F Herbert <thomasfherbert@gmail.com>
> >> CC: Xiao Liang <shaw.leon@gmail.com>
> >> CC: Eric Garver <e@erig.me>
> >> CC: Neil McKee <neil.mckee@inmon.com>
> >> ---
> >>  lib/packets.h                |  4 +++
> >>  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++---
> >> ofproto/ofproto-dpif-sflow.h |  9 +++++
> >>  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
> >>  tests/test-sflow.c           | 25 +++++++++++++
> >>  5 files changed, 177 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/lib/packets.h b/lib/packets.h index 755f08d..7555dfc
> >> 100644
> >> --- a/lib/packets.h
> >> +++ b/lib/packets.h
> >> @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
> >>          eth_type == htons(ETH_TYPE_VLAN_8021AD);  }
> >>
> >> +static inline bool eth_type_8021Q(ovs_be16 eth_type) {
> >> +    return eth_type == htons(ETH_TYPE_VLAN_8021Q); }
> >>
> >>  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
> >>   * lengths. */
> >> diff --git a/ofproto/ofproto-dpif-sflow.c
> >> b/ofproto/ofproto-dpif-sflow.c index 59fafa1..41972e2 100644
> >> --- a/ofproto/ofproto-dpif-sflow.c
> >> +++ b/ofproto/ofproto-dpif-sflow.c
> >> @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
> >>      }
> >>  }
> >>
> >> +static void
> >> +dpif_sflow_pop_vlan(const struct flow *flow,
> >> +                    struct dpif_sflow_actions *sflow_actions) {
> >> +    union flow_vlan_hdr vlan = flow->vlans[0];
> >> +    if (eth_type_vlan(vlan.tpid)) {
> >> +        int depth = 0;
> >> +        int ii;
> >> +        /* Calculate depth by detecting 8021Q TPID. */
> >> +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
> >> +            vlan = flow->vlans[ii];
> >> +            depth++;
> >> +            if (eth_type_8021Q(vlan.tpid)) {
> >> +                break;
> >> +            }
> >
> > This seems to be using 0x8100 as a marker to stop. Does this mean
> > double stacked 0x8100 will not work?
> >
> > flow_count_vlan_headers() can be used to find the number of tags.
> >
> >> +        }
> >> +
> >> +        if (depth > 1) {
> >> +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
> >> +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
> >> +            sflow_actions->vlan_stack_depth++;
> >> +        }
> >
> > AFAICS, this only adds the outermost tag to the stack. Is that
> > intentional? It's unclear to me if sFlow expects one or two VLANs to
> > be in the stack.
> >
> >> +    }
> >> +}
> >> +
> >>  void
> >>  dpif_sflow_read_actions(const struct flow *flow,
> >>                       const struct nlattr *actions, size_t
> >> actions_len, @@ -1170,11 +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
> >>           break;
> >>
> >>       case OVS_ACTION_ATTR_PUSH_VLAN:
> >> +            break;
> >> +
> >>       case OVS_ACTION_ATTR_POP_VLAN:
> >> -         /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
> >> -          * construct a VLAN-stack. The sFlow user-action cookie already
> >> -          * captures the egress VLAN ID so there is nothing more to do here.
> >> -          */
> >> +            dpif_sflow_pop_vlan(flow, sflow_actions);
> >>           break;
> >>
> >>       case OVS_ACTION_ATTR_PUSH_MPLS: { @@ -1225,6 +1249,19 @@
> >> dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
> >>      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;  }
> >>
> >> +static void
> >> +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
> >> +                             uint32_t *vlans_buf,
> >> +                             const struct dpif_sflow_actions
> >> +*sflow_actions) {
> >> +    int ii;
> >> +    stack->depth = sflow_actions->vlan_stack_depth;
> >> +    stack->stack = vlans_buf;
> >> +    for (ii = 0; ii < stack->depth; ii++) {
> >> +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
> >> +    }
> >> +}
> >> +
> >>  /* Extract the output port count from the user action cookie.
> >>   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
> >>   */
> >> @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> >>      SFLFlow_sample_element vniInElem, vniOutElem;
> >>      SFLFlow_sample_element mplsElem;
> >>      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
> >> +    SFLFlow_sample_element vlanTunnelElem;
> >> +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
> >>      SFLSampler *sampler;
> >>      struct dpif_sflow_port *in_dsp;
> >>      struct dpif_sflow_port *out_dsp; @@ -1372,6 +1411,19 @@
> >> dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> >>       SFLADD_ELEMENT(&fs, &mplsElem);
> >>      }
> >>
> >> +    /* stripped VLAN tags stack. */
> >> +    if (sflow_actions
> >> +        && sflow_actions->vlan_stack_depth > 0
> >> +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
> >> +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
> >> +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
> >> +        dpif_sflow_encode_vlan_stack(
> >> +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
> >> +                                 vlans_buf,
> >> +                                 sflow_actions);
> >> +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
> >> +    }
> >> +
> >>      /* Submit the flow sample to be encoded into the next datagram. */
> >>      SFLADD_ELEMENT(&fs, &hdrElem);
> >>      SFLADD_ELEMENT(&fs, &switchElem); diff --git
> >> a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h index
> >> 014e6cc..cfd5492 100644
> >> --- a/ofproto/ofproto-dpif-sflow.h
> >> +++ b/ofproto/ofproto-dpif-sflow.h
> >> @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
> >>      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
> >>      uint32_t mpls_stack_depth;               /* Out stack depth. */
> >>      bool mpls_err;                           /* MPLS actions parse failure. */
> >> +
> >> +    /* Using host-byte order for the vlan stack here
> >> +       to match the expectations of the sFlow library.
> >> +       List of stripped 802.1Q TPID/TCI layers. Each
> >> +       TPID,TCI pair is represented as a single 32 bit
> >> +       integer. Layers listed from outermost to innermost.
> >> +    */
> >> +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
> >> +    uint32_t vlan_stack_depth;                 /* Stack depth. */
> >>  };
> >>
> >>  struct dpif_sflow *dpif_sflow_create(void); diff --git
> >> a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index
> >> 0c2ea38..f096175 100644
> >> --- a/tests/ofproto-dpif.at
> >> +++ b/tests/ofproto-dpif.at
> >> @@ -6333,6 +6333,89 @@ HEADER
> >>  OVS_VSWITCHD_STOP
> >>  AT_CLEANUP
> >>
> >> +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN
> >> +tunnel]) AT_XFAIL_IF([test "$IS_WIN32" = "yes"]) OVS_VSWITCHD_START
> >> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
> >> +add_of_ports br0 1 2 AT_DATA([flows.txt], [dnl
> >> +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
> >> +])
> >> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> >> +
> >> +dnl set up sFlow logging
> >> +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir
> >> +--pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
> >> +AT_CAPTURE_FILE([sflow.log])
> >> +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT]) ovs-appctl
> >> +time/stop
> >> +
> >> +dnl configure sflow
> >> +ovs-vsctl \
> >> +   set Bridge br0 sflow=@sf -- \
> >> +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
> >> +     header=128 sampling=1 polling=0
> >> +
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1
> >> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_typ
> >> +e(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=
> >> +2000,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1
> >> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_typ
> >> +e(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
> >> +
> >> +dnl sleep long enough to get the sFlow datagram flushed out (may be
> >> +delayed for up to 1 second) for i in `seq 1 30`; do
> >> +    ovs-appctl time/warp 100
> >> +done
> >> +
> >> +OVS_APP_EXIT_AND_WAIT([test-sflow])
> >> +
> >> +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
> >> +        /g']], [0], [dnl
> >> +HEADER
> >> +        dgramSeqNo=1
> >> +        ds=127.0.0.1>2:1000
> >> +        fsSeqNo=1
> >> +        ext_vlan_tpid_0=0x88a8
> >> +        ext_vlan_vid_0=150
> >> +        ext_vlan_pcp_0=0
> >> +        ext_vlan_cfi_0=1
> >> +        in_vlan=150
> >> +        in_priority=0
> >> +        out_vlan=0
> >> +        out_priority=0
> >> +        meanSkip=1
> >> +        samplePool=1
> >> +        dropEvents=0
> >> +        in_ifindex=0
> >> +        in_format=0
> >> +        out_ifindex=1
> >> +        out_format=2
> >> +        hdr_prot=1
> >> +        pkt_len=22
> >> +        stripped=4
> >> +        hdr_len=18
> >> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
> >> +HEADER
> >> +        dgramSeqNo=1
> >> +        ds=127.0.0.1>2:1000
> >> +        fsSeqNo=2
> >> +        in_vlan=2000
> >> +        in_priority=0
> >> +        out_vlan=0
> >> +        out_priority=0
> >> +        meanSkip=1
> >> +        samplePool=2
> >> +        dropEvents=0
> >> +        in_ifindex=0
> >> +        in_format=0
> >> +        out_ifindex=1
> >> +        out_format=2
> >> +        hdr_prot=1
> >> +        pkt_len=42
> >> +        stripped=4
> >> +        hdr_len=38
> >> +
> >> +hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-1
> >> +4-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
> >> +])
> >> +
> >> +OVS_VSWITCHD_STOP
> >> +AT_CLEANUP
> >> +
> >>
> >>  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
> >>  #
> >> diff --git a/tests/test-sflow.c b/tests/test-sflow.c index
> >> 6125d38..7d58ef3 100644
> >> --- a/tests/test-sflow.c
> >> +++ b/tests/test-sflow.c
> >> @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;  #define
> >> SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029  #define
> >> SFLOW_TAG_PKT_TUNNEL_VNI_IN 1030  #define SFLOW_TAG_PKT_MPLS 1006
> >> +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
> >>
> >>  /* string sizes */
> >>  #define SFL_MAX_PORTNAME_LEN 255
> >> @@ -116,6 +117,7 @@ struct sflow_xdr {
> >>       uint32_t TUNNEL_VNI_OUT;
> >>       uint32_t TUNNEL_VNI_IN;
> >>       uint32_t MPLS;
> >> +        uint32_t EXT_VLAN;
> >>       uint32_t IFCOUNTERS;
> >>       uint32_t ETHCOUNTERS;
> >>       uint32_t LACPCOUNTERS;
> >> @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
> >>              }
> >>          }
> >>
> >> +        if (x->offset.EXT_VLAN) {
> >> +            uint32_t stack_depth, ii;
> >> +            union flow_vlan_hdr vlan;
> >> +            sflowxdr_setc(x, x->offset.EXT_VLAN);
> >> +            /* print stripped VLAN tags stack */
> >> +            stack_depth = sflowxdr_next(x);
> >> +            for (ii = 0; ii < stack_depth; ii++) {
> >> +                vlan.qtag=sflowxdr_next_n(x);
> >> +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
> >> +                       ii, ntohs(vlan.tpid));
> >> +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
> >> +                       ii, vlan_tci_to_vid(vlan.tci));
> >> +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
> >> +                       ii, vlan_tci_to_pcp(vlan.tci));
> >> +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
> >> +                       ii, vlan_tci_to_cfi(vlan.tci));
> >> +            }
> >> +        }
> >> +
> >>          if (x->offset.SWITCH) {
> >>              sflowxdr_setc(x, x->offset.SWITCH);
> >>              printf(" in_vlan=%"PRIu32, sflowxdr_next(x)); @@ -634,6
> >> +655,10 @@ process_datagram(struct sflow_xdr *x)
> >>                      sflowxdr_mark_unique(x, &x->offset.MPLS);
> >>                      break;
> >>
> >> +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
> >> +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
> >> +                    break;
> >> +
> >>                      /* Add others here... */
> >>                  }
> >>
> >> --
> >> 1.9.3
> >>
> >> --------------------------------------------------------------
> >> Intel Research and Development Ireland Limited Registered in Ireland
> >> Registered Office: Collinstown Industrial Park, Leixlip, County
> >> Kildare Registered Number: 308263
> >>
> >>
> >> This e-mail and any attachments may contain confidential material for
> >> the sole use of the intended recipient(s). Any review or distribution
> >> by others is strictly prohibited. If you are not the intended
> >> recipient, please contact the sender and delete all copies.
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited
> Registered in Ireland
> Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
> Registered Number: 308263
>
>
> This e-mail and any attachments may contain confidential material for the sole
> use of the intended recipient(s). Any review or distribution by others is
> strictly prohibited. If you are not the intended recipient, please contact the
> sender and delete all copies.
Lukasz Rzasik April 21, 2017, 2:12 p.m. UTC | #7
Hi Neil,

Thanks for the clarification. I now understand that the extended_vlantunnel structure should not be used with QinQ scenario.
Most probably I will start the discussion at sflow.org as you suggested.

I will keep you informed about that.

-----Original Message-----
From: Neil McKee [mailto:neil.mckee@inmon.com] 
Sent: Thursday, April 20, 2017 7:50 PM
To: Eric Garver <e@erig.me>; Rzasik, LukaszX <lukaszx.rzasik@intel.com>; Thomas F Herbert <thomasfherbert@gmail.com>; dev@openvswitch.org
Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow counters

The purpose of the extended_vlantunnel structure was to help describe the packet at the point of sampling, not to describe encap/decap forwarding actions that were subsequently applied.  We will need to define new structures on sFlow.org to describe QinQ encap/decap actions,  similar to http://sflow.org/sflow_tunnels.txt.  Please start a thread at sFlow.org to get that rolling.  Once we have those new sFlow structures defined your patch will probably look mostly the same.

So what was extended_vlantunnel really for?  To clarify, let's take a specific example: the test packet in your patch.  It shows a standard QinQ header using Ethernet types 0x88A8 and 0x8100.  If you put that into Wireshark it will have no trouble decoding it correctly.  There is nothing we can add to make it clearer.  So the sFlow collector should decode it and extract the tags (I know of several that will).
No need for any further annotations.  However, if those ethernet types were completely arbitrary and non-standard (so that Wireshark would be
defeated) then you would doctor the sampled packet header, remove the two 802.1Q headers and add 8 to the "stripped" field.  Only then would you report the tags in an extended_vlantunnel annotation.

I don't think OVS is likely to encounter non-standard ethernet types in that way,  or hardware that strips vlan tags from the packet before software even sees it.  So extended_vlantunnel can perhaps be regarded as an anachronism that dates back to wild-west days?



On Thu, Apr 20, 2017 at 5:39 AM Rzasik, LukaszX <lukaszx.rzasik@intel.com> wrote:
>
> Hi Neil,
>
> Thanks for the comments. I'll try to explain what is my purpose and why I implemented it that way.
>
> I'm aiming at implementing QinQ related sFlow counters and it seemed that extended_vlantunnel is the right place.
> The only other place that I can find where VLAN tags can be reported is extended_switch but there is place only for a single src_vlan and dst_vlan.
>
> From the description of extended_vlantunnel it fits QinQ scenario ideally. I tried to keep my implementation as close as possible with the description. It does not report all VLAN tags. It reports only stripped VLAN tags when there are multiple tags in the packet.
>
> Also TODO that you left in the code made me confident that it is the right approach:
> /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
>   * construct a VLAN-stack. The sFlow user-action cookie already
>   * captures the egress VLAN ID so there is nothing more to do here.
>   */
>
> After your comment I looked for some more information about using extended_vlantunnel for reporting non-standard 802.1Q headers but I cannot find any information about that in the sFlow spec.
>
> As you mentioned, the spec states only these three conditions:
>      1. The packet has nested vlan tags, AND
>      2. The reporting device is VLAN aware, AND
>      3. One or more VLAN tags have been stripped, either
>         because they represent proprietary encapsulations, or
>         because switch hardware automatically strips the outer VLAN
>         encapsulation.
>
> And I think they are satisfied in the implementation.
>
> Could you explain in more details why you think this is not the right place to report stripped VLAN tags in QinQ scenario?
>
> The implementation does not consider 0x9100 ethernet type as I could not find it anywhere used in OVS code.
> I could extend the implementation to take it into account as the alternative for 0x88a8.
>
> Thanks!
>
> -----Original Message-----
> From: Neil McKee [mailto:neil.mckee@inmon.com]
> Sent: Tuesday, April 18, 2017 7:17 PM
> To: Eric Garver <e@erig.me>; Rzasik, LukaszX 
> <lukaszx.rzasik@intel.com>; dev@openvswitch.org; Thomas F Herbert 
> <thomasfherbert@gmail.com>
> Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow 
> counters
>
> This patch seems like it may be reporting a VLAN stack that can already be decoded from the (ingress-sampled) packet header?
>
> The TODO I left in the code may have been misleading...  this structure is intended for the case where the sampled packet-header includes non-standard 802.1Q headers -- which then have to be stripped out of the sampled header to make it decodable,  or where the 802.1Q headers have already been stripped out of the packet before you sampled it (e.g. by hardware).
>
>  Here is the wording from the sFlow spec at
> http://sflow.org/sflow_version_5.txt:
>
> /* Extended VLAN tunnel information
>    Record outer VLAN encapsulations that have
>    been stripped. extended_vlantunnel information
>    should only be reported if all the following conditions are satisfied:
>      1. The packet has nested vlan tags, AND
>      2. The reporting device is VLAN aware, AND
>      3. One or more VLAN tags have been stripped, either
>         because they represent proprietary encapsulations, or
>         because switch hardware automatically strips the outer VLAN
>         encapsulation.
>    Reporting extended_vlantunnel information is not a substitute for
>    reporting extended_switch information. extended_switch data must
>    always be reported to describe the ingress/egress VLAN information
>    for the packet. The extended_vlantunnel information only applies to
>    nested VLAN tags, and then only when one or more tags has been
>    stripped. */
> /* opaque = flow_data; enterprise = 0; format = 1012 */ extended_vlantunnel {
>   unsigned int stack<>;  /* List of stripped 802.1Q TPID/TCI layers. Each
>                             TPID,TCI pair is represented as a single 32 bit
>                             integer. Layers listed from outermost to
>                             innermost. */ }
>
> My impression is that non-standard 802.1Q headers are now rare (particularly where OVS is concerned).  i.e. everyone uses the
> standard ethernet types 0x8100, 0x9100 etc.   So it's unlikely that
> the OVS sFlow agent will need to doctor the sampled header to remove non-standard 802.1Q headers.
>
> [Quite separately,  if the OVS flow actions included instructions to
> *push* vlans onto the VLAN stack before forwarding the packet,  then it would make sense to report that in the sFlow export just as we do when packets are pushed onto other kinds of tunnel (MPLS, VXLAN etc.).
>   Unfortunately there doesn't seem to be an sFlow structure that 
> represents this today (e.g. in http://sflow.org/sflow_tunnels.txt)]
> ------
> Neil McKee
> InMon Corp.
> http://www.inmon.com
>
>
> On Tue, Apr 18, 2017 at 8:24 AM, Eric Garver <e@erig.me> wrote:
> > Hi Lukasz,
> >
> > Thanks for expanding the 802.1ad support.
> > I'm not familiar with sFlow, so I'll provide what feedback I can.
> >
> > On Tue, Apr 18, 2017 at 12:08:22PM +0100, Lukasz Rzasik wrote:
> >> This patch implements QinQ related sFlow counters.
> >> It is implemented according to sFlow Version 5, 
> >> http://www.sflow.org/sflow_version_5.txt
> >> Open vSwitch will send a stack of stripped VLAN tags to sFlow 
> >> collector if the original packets have multiple VLAN tags.
> >>
> >> Unit tests have been updated accordingly.
> >>
> >> The patch is based on commit f0fb825a37 (Add support for 802.1ad 
> >> (QinQ tunneling))
> >>
> >> Signed-off-by: Lukasz Rzasik <lukaszx.rzasik@intel.com>
> >> CC: Thomas F Herbert <thomasfherbert@gmail.com>
> >> CC: Xiao Liang <shaw.leon@gmail.com>
> >> CC: Eric Garver <e@erig.me>
> >> CC: Neil McKee <neil.mckee@inmon.com>
> >> ---
> >>  lib/packets.h                |  4 +++
> >>  ofproto/ofproto-dpif-sflow.c | 60 +++++++++++++++++++++++++++++--- 
> >> ofproto/ofproto-dpif-sflow.h |  9 +++++
> >>  tests/ofproto-dpif.at        | 83 ++++++++++++++++++++++++++++++++++++++++++++
> >>  tests/test-sflow.c           | 25 +++++++++++++
> >>  5 files changed, 177 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/lib/packets.h b/lib/packets.h index 755f08d..7555dfc
> >> 100644
> >> --- a/lib/packets.h
> >> +++ b/lib/packets.h
> >> @@ -384,6 +384,10 @@ static inline bool eth_type_vlan(ovs_be16 eth_type)
> >>          eth_type == htons(ETH_TYPE_VLAN_8021AD);  }
> >>
> >> +static inline bool eth_type_8021Q(ovs_be16 eth_type) {
> >> +    return eth_type == htons(ETH_TYPE_VLAN_8021Q); }
> >>
> >>  /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
> >>   * lengths. */
> >> diff --git a/ofproto/ofproto-dpif-sflow.c 
> >> b/ofproto/ofproto-dpif-sflow.c index 59fafa1..41972e2 100644
> >> --- a/ofproto/ofproto-dpif-sflow.c
> >> +++ b/ofproto/ofproto-dpif-sflow.c
> >> @@ -1084,6 +1084,31 @@ dpif_sflow_capture_input_mpls(const struct flow *flow,
> >>      }
> >>  }
> >>
> >> +static void
> >> +dpif_sflow_pop_vlan(const struct flow *flow,
> >> +                    struct dpif_sflow_actions *sflow_actions) {
> >> +    union flow_vlan_hdr vlan = flow->vlans[0];
> >> +    if (eth_type_vlan(vlan.tpid)) {
> >> +        int depth = 0;
> >> +        int ii;
> >> +        /* Calculate depth by detecting 8021Q TPID. */
> >> +        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
> >> +            vlan = flow->vlans[ii];
> >> +            depth++;
> >> +            if (eth_type_8021Q(vlan.tpid)) {
> >> +                break;
> >> +            }
> >
> > This seems to be using 0x8100 as a marker to stop. Does this mean 
> > double stacked 0x8100 will not work?
> >
> > flow_count_vlan_headers() can be used to find the number of tags.
> >
> >> +        }
> >> +
> >> +        if (depth > 1) {
> >> +            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
> >> +            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
> >> +            sflow_actions->vlan_stack_depth++;
> >> +        }
> >
> > AFAICS, this only adds the outermost tag to the stack. Is that 
> > intentional? It's unclear to me if sFlow expects one or two VLANs to 
> > be in the stack.
> >
> >> +    }
> >> +}
> >> +
> >>  void
> >>  dpif_sflow_read_actions(const struct flow *flow,
> >>                       const struct nlattr *actions, size_t 
> >> actions_len, @@ -1170,11 +1195,10 @@ dpif_sflow_read_actions(const struct flow *flow,
> >>           break;
> >>
> >>       case OVS_ACTION_ATTR_PUSH_VLAN:
> >> +            break;
> >> +
> >>       case OVS_ACTION_ATTR_POP_VLAN:
> >> -         /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
> >> -          * construct a VLAN-stack. The sFlow user-action cookie already
> >> -          * captures the egress VLAN ID so there is nothing more to do here.
> >> -          */
> >> +            dpif_sflow_pop_vlan(flow, sflow_actions);
> >>           break;
> >>
> >>       case OVS_ACTION_ATTR_PUSH_MPLS: { @@ -1225,6 +1249,19 @@ 
> >> dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
> >>      stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;  }
> >>
> >> +static void
> >> +dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
> >> +                             uint32_t *vlans_buf,
> >> +                             const struct dpif_sflow_actions
> >> +*sflow_actions) {
> >> +    int ii;
> >> +    stack->depth = sflow_actions->vlan_stack_depth;
> >> +    stack->stack = vlans_buf;
> >> +    for (ii = 0; ii < stack->depth; ii++) {
> >> +        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
> >> +    }
> >> +}
> >> +
> >>  /* Extract the output port count from the user action cookie.
> >>   * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
> >>   */
> >> @@ -1258,6 +1295,8 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> >>      SFLFlow_sample_element vniInElem, vniOutElem;
> >>      SFLFlow_sample_element mplsElem;
> >>      uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
> >> +    SFLFlow_sample_element vlanTunnelElem;
> >> +    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
> >>      SFLSampler *sampler;
> >>      struct dpif_sflow_port *in_dsp;
> >>      struct dpif_sflow_port *out_dsp; @@ -1372,6 +1411,19 @@ 
> >> dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
> >>       SFLADD_ELEMENT(&fs, &mplsElem);
> >>      }
> >>
> >> +    /* stripped VLAN tags stack. */
> >> +    if (sflow_actions
> >> +        && sflow_actions->vlan_stack_depth > 0
> >> +        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
> >> +        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
> >> +        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
> >> +        dpif_sflow_encode_vlan_stack(
> >> +                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
> >> +                                 vlans_buf,
> >> +                                 sflow_actions);
> >> +        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
> >> +    }
> >> +
> >>      /* Submit the flow sample to be encoded into the next datagram. */
> >>      SFLADD_ELEMENT(&fs, &hdrElem);
> >>      SFLADD_ELEMENT(&fs, &switchElem); diff --git 
> >> a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h index
> >> 014e6cc..cfd5492 100644
> >> --- a/ofproto/ofproto-dpif-sflow.h
> >> +++ b/ofproto/ofproto-dpif-sflow.h
> >> @@ -49,6 +49,15 @@ struct dpif_sflow_actions {
> >>      uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
> >>      uint32_t mpls_stack_depth;               /* Out stack depth. */
> >>      bool mpls_err;                           /* MPLS actions parse failure. */
> >> +
> >> +    /* Using host-byte order for the vlan stack here
> >> +       to match the expectations of the sFlow library.
> >> +       List of stripped 802.1Q TPID/TCI layers. Each
> >> +       TPID,TCI pair is represented as a single 32 bit
> >> +       integer. Layers listed from outermost to innermost.
> >> +    */
> >> +    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
> >> +    uint32_t vlan_stack_depth;                 /* Stack depth. */
> >>  };
> >>
> >>  struct dpif_sflow *dpif_sflow_create(void); diff --git 
> >> a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index
> >> 0c2ea38..f096175 100644
> >> --- a/tests/ofproto-dpif.at
> >> +++ b/tests/ofproto-dpif.at
> >> @@ -6333,6 +6333,89 @@ HEADER
> >>  OVS_VSWITCHD_STOP
> >>  AT_CLEANUP
> >>
> >> +AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN
> >> +tunnel]) AT_XFAIL_IF([test "$IS_WIN32" = "yes"]) 
> >> +OVS_VSWITCHD_START AT_CHECK([ovs-appctl vlog/set dpif:dbg 
> >> +dpif_netdev:dbg]) add_of_ports br0 1 2 AT_DATA([flows.txt], [dnl
> >> +table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
> >> +])
> >> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> >> +
> >> +dnl set up sFlow logging
> >> +AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir 
> >> +--pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
> >> +AT_CAPTURE_FILE([sflow.log])
> >> +PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT]) ovs-appctl 
> >> +time/stop
> >> +
> >> +dnl configure sflow
> >> +ovs-vsctl \
> >> +   set Bridge br0 sflow=@sf -- \
> >> +   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
> >> +     header=128 sampling=1 polling=0
> >> +
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
> >> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_t
> >> +yp 
> >> +e(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vi
> >> +d=
> >> +2000,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
> >> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 
> >> +'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_t
> >> +yp
> >> +e(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
> >> +
> >> +dnl sleep long enough to get the sFlow datagram flushed out (may 
> >> +be delayed for up to 1 second) for i in `seq 1 30`; do
> >> +    ovs-appctl time/warp 100
> >> +done
> >> +
> >> +OVS_APP_EXIT_AND_WAIT([test-sflow])
> >> +
> >> +AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
> >> +        /g']], [0], [dnl
> >> +HEADER
> >> +        dgramSeqNo=1
> >> +        ds=127.0.0.1>2:1000
> >> +        fsSeqNo=1
> >> +        ext_vlan_tpid_0=0x88a8
> >> +        ext_vlan_vid_0=150
> >> +        ext_vlan_pcp_0=0
> >> +        ext_vlan_cfi_0=1
> >> +        in_vlan=150
> >> +        in_priority=0
> >> +        out_vlan=0
> >> +        out_priority=0
> >> +        meanSkip=1
> >> +        samplePool=1
> >> +        dropEvents=0
> >> +        in_ifindex=0
> >> +        in_format=0
> >> +        out_ifindex=1
> >> +        out_format=2
> >> +        hdr_prot=1
> >> +        pkt_len=22
> >> +        stripped=4
> >> +        hdr_len=18
> >> +        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
> >> +HEADER
> >> +        dgramSeqNo=1
> >> +        ds=127.0.0.1>2:1000
> >> +        fsSeqNo=2
> >> +        in_vlan=2000
> >> +        in_priority=0
> >> +        out_vlan=0
> >> +        out_priority=0
> >> +        meanSkip=1
> >> +        samplePool=2
> >> +        dropEvents=0
> >> +        in_ifindex=0
> >> +        in_format=0
> >> +        out_ifindex=1
> >> +        out_format=2
> >> +        hdr_prot=1
> >> +        pkt_len=42
> >> +        stripped=4
> >> +        hdr_len=38
> >> +
> >> +hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00
> >> +-1
> >> +4-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
> >> +])
> >> +
> >> +OVS_VSWITCHD_STOP
> >> +AT_CLEANUP
> >> +
> >>
> >>  # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
> >>  #
> >> diff --git a/tests/test-sflow.c b/tests/test-sflow.c index
> >> 6125d38..7d58ef3 100644
> >> --- a/tests/test-sflow.c
> >> +++ b/tests/test-sflow.c
> >> @@ -65,6 +65,7 @@ static unixctl_cb_func test_sflow_exit;  #define 
> >> SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029  #define 
> >> SFLOW_TAG_PKT_TUNNEL_VNI_IN 1030  #define SFLOW_TAG_PKT_MPLS 1006
> >> +#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
> >>
> >>  /* string sizes */
> >>  #define SFL_MAX_PORTNAME_LEN 255
> >> @@ -116,6 +117,7 @@ struct sflow_xdr {
> >>       uint32_t TUNNEL_VNI_OUT;
> >>       uint32_t TUNNEL_VNI_IN;
> >>       uint32_t MPLS;
> >> +        uint32_t EXT_VLAN;
> >>       uint32_t IFCOUNTERS;
> >>       uint32_t ETHCOUNTERS;
> >>       uint32_t LACPCOUNTERS;
> >> @@ -428,6 +430,25 @@ process_flow_sample(struct sflow_xdr *x)
> >>              }
> >>          }
> >>
> >> +        if (x->offset.EXT_VLAN) {
> >> +            uint32_t stack_depth, ii;
> >> +            union flow_vlan_hdr vlan;
> >> +            sflowxdr_setc(x, x->offset.EXT_VLAN);
> >> +            /* print stripped VLAN tags stack */
> >> +            stack_depth = sflowxdr_next(x);
> >> +            for (ii = 0; ii < stack_depth; ii++) {
> >> +                vlan.qtag=sflowxdr_next_n(x);
> >> +                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
> >> +                       ii, ntohs(vlan.tpid));
> >> +                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
> >> +                       ii, vlan_tci_to_vid(vlan.tci));
> >> +                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
> >> +                       ii, vlan_tci_to_pcp(vlan.tci));
> >> +                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
> >> +                       ii, vlan_tci_to_cfi(vlan.tci));
> >> +            }
> >> +        }
> >> +
> >>          if (x->offset.SWITCH) {
> >>              sflowxdr_setc(x, x->offset.SWITCH);
> >>              printf(" in_vlan=%"PRIu32, sflowxdr_next(x)); @@ 
> >> -634,6
> >> +655,10 @@ process_datagram(struct sflow_xdr *x)
> >>                      sflowxdr_mark_unique(x, &x->offset.MPLS);
> >>                      break;
> >>
> >> +                case SFLOW_TAG_PKT_EXTENDED_VLAN:
> >> +                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
> >> +                    break;
> >> +
> >>                      /* Add others here... */
> >>                  }
> >>
> >> --
> >> 1.9.3
> >>
> >> --------------------------------------------------------------
> >> Intel Research and Development Ireland Limited Registered in 
> >> Ireland Registered Office: Collinstown Industrial Park, Leixlip, 
> >> County Kildare Registered Number: 308263
> >>
> >>
> >> This e-mail and any attachments may contain confidential material 
> >> for the sole use of the intended recipient(s). Any review or 
> >> distribution by others is strictly prohibited. If you are not the 
> >> intended recipient, please contact the sender and delete all copies.
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> --------------------------------------------------------------
> Intel Research and Development Ireland Limited Registered in Ireland 
> Registered Office: Collinstown Industrial Park, Leixlip, County 
> Kildare Registered Number: 308263
>
>
> This e-mail and any attachments may contain confidential material for 
> the sole use of the intended recipient(s). Any review or distribution 
> by others is strictly prohibited. If you are not the intended 
> recipient, please contact the sender and delete all copies.
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
Ben Pfaff April 22, 2017, 4:42 a.m. UTC | #8
On Thu, Apr 20, 2017 at 10:50:17AM -0700, Neil McKee wrote:
> I don't think OVS is likely to encounter non-standard ethernet types
> in that way,  or hardware that strips vlan tags from the packet before
> software even sees it.  So extended_vlantunnel can perhaps be regarded
> as an anachronism that dates back to wild-west days?

OVS only supports 0x8100 and 0x88a8 Ethertype types for VLANs (so far),
so I guess that you're right.
Lukasz Rzasik April 28, 2017, 12:33 p.m. UTC | #9
The discussion has been started on sFlow group and can be found here: https://groups.google.com/forum/#!topic/sflow/S22dbzam404

-----Original Message-----
From: Ben Pfaff [mailto:blp@ovn.org] 
Sent: Saturday, April 22, 2017 6:43 AM
To: Neil McKee <neil.mckee@inmon.com>
Cc: Eric Garver <e@erig.me>; Rzasik, LukaszX <lukaszx.rzasik@intel.com>; Thomas F Herbert <thomasfherbert@gmail.com>; dev@openvswitch.org
Subject: Re: [ovs-dev] [PATCH] ofproto-dpif: Send QinQ related sFlow counters

On Thu, Apr 20, 2017 at 10:50:17AM -0700, Neil McKee wrote:
> I don't think OVS is likely to encounter non-standard ethernet types 
> in that way,  or hardware that strips vlan tags from the packet before 
> software even sees it.  So extended_vlantunnel can perhaps be regarded 
> as an anachronism that dates back to wild-west days?

OVS only supports 0x8100 and 0x88a8 Ethertype types for VLANs (so far), so I guess that you're right.
--------------------------------------------------------------
Intel Research and Development Ireland Limited
Registered in Ireland
Registered Office: Collinstown Industrial Park, Leixlip, County Kildare
Registered Number: 308263


This e-mail and any attachments may contain confidential material for the sole
use of the intended recipient(s). Any review or distribution by others is
strictly prohibited. If you are not the intended recipient, please contact the
sender and delete all copies.
diff mbox

Patch

diff --git a/lib/packets.h b/lib/packets.h
index 755f08d..7555dfc 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -384,6 +384,10 @@  static inline bool eth_type_vlan(ovs_be16 eth_type)
         eth_type == htons(ETH_TYPE_VLAN_8021AD);
 }
 
+static inline bool eth_type_8021Q(ovs_be16 eth_type)
+{
+    return eth_type == htons(ETH_TYPE_VLAN_8021Q);
+}
 
 /* Minimum value for an Ethernet type.  Values below this are IEEE 802.2 frame
  * lengths. */
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index 59fafa1..41972e2 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -1084,6 +1084,31 @@  dpif_sflow_capture_input_mpls(const struct flow *flow,
     }
 }
 
+static void
+dpif_sflow_pop_vlan(const struct flow *flow,
+                    struct dpif_sflow_actions *sflow_actions)
+{
+    union flow_vlan_hdr vlan = flow->vlans[0];
+    if (eth_type_vlan(vlan.tpid)) {
+        int depth = 0;
+        int ii;
+        /* Calculate depth by detecting 8021Q TPID. */
+        for (ii = 0; ii < FLOW_MAX_VLAN_HEADERS; ii++) {
+            vlan = flow->vlans[ii];
+            depth++;
+            if (eth_type_8021Q(vlan.tpid)) {
+                break;
+            }
+        }
+
+        if (depth > 1) {
+            sflow_actions->vlan_qtag[sflow_actions->vlan_stack_depth] =
+            ntohl(flow->vlans[sflow_actions->vlan_stack_depth].qtag);
+            sflow_actions->vlan_stack_depth++;
+        }
+    }
+}
+
 void
 dpif_sflow_read_actions(const struct flow *flow,
 			const struct nlattr *actions, size_t actions_len,
@@ -1170,11 +1195,10 @@  dpif_sflow_read_actions(const struct flow *flow,
 	    break;
 
 	case OVS_ACTION_ATTR_PUSH_VLAN:
+            break;
+
 	case OVS_ACTION_ATTR_POP_VLAN:
-	    /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
-	     * construct a VLAN-stack. The sFlow user-action cookie already
-	     * captures the egress VLAN ID so there is nothing more to do here.
-	     */
+            dpif_sflow_pop_vlan(flow, sflow_actions);
 	    break;
 
 	case OVS_ACTION_ATTR_PUSH_MPLS: {
@@ -1225,6 +1249,19 @@  dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
     stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
 }
 
+static void
+dpif_sflow_encode_vlan_stack(SFLVlanStack *stack,
+                             uint32_t *vlans_buf,
+                             const struct dpif_sflow_actions *sflow_actions)
+{
+    int ii;
+    stack->depth = sflow_actions->vlan_stack_depth;
+    stack->stack = vlans_buf;
+    for (ii = 0; ii < stack->depth; ii++) {
+        stack->stack[ii] = sflow_actions->vlan_qtag[ii];
+    }
+}
+
 /* Extract the output port count from the user action cookie.
  * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
  */
@@ -1258,6 +1295,8 @@  dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
     SFLFlow_sample_element vniInElem, vniOutElem;
     SFLFlow_sample_element mplsElem;
     uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
+    SFLFlow_sample_element vlanTunnelElem;
+    uint32_t vlans_buf[FLOW_MAX_VLAN_HEADERS];
     SFLSampler *sampler;
     struct dpif_sflow_port *in_dsp;
     struct dpif_sflow_port *out_dsp;
@@ -1372,6 +1411,19 @@  dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
 	SFLADD_ELEMENT(&fs, &mplsElem);
     }
 
+    /* stripped VLAN tags stack. */
+    if (sflow_actions
+        && sflow_actions->vlan_stack_depth > 0
+        && dpif_sflow_cookie_num_outputs(cookie) == 1) {
+        memset(&vlanTunnelElem, 0, sizeof(vlanTunnelElem));
+        vlanTunnelElem.tag = SFLFLOW_EX_VLAN_TUNNEL;
+        dpif_sflow_encode_vlan_stack(
+                                 &vlanTunnelElem.flowType.vlan_tunnel.stack,
+                                 vlans_buf,
+                                 sflow_actions);
+        SFLADD_ELEMENT(&fs, &vlanTunnelElem);
+    }
+
     /* Submit the flow sample to be encoded into the next datagram. */
     SFLADD_ELEMENT(&fs, &hdrElem);
     SFLADD_ELEMENT(&fs, &switchElem);
diff --git a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h
index 014e6cc..cfd5492 100644
--- a/ofproto/ofproto-dpif-sflow.h
+++ b/ofproto/ofproto-dpif-sflow.h
@@ -49,6 +49,15 @@  struct dpif_sflow_actions {
     uint32_t mpls_lse[FLOW_MAX_MPLS_LABELS]; /* Out stack in host byte order. */
     uint32_t mpls_stack_depth;               /* Out stack depth. */
     bool mpls_err;                           /* MPLS actions parse failure. */
+
+    /* Using host-byte order for the vlan stack here
+       to match the expectations of the sFlow library.
+       List of stripped 802.1Q TPID/TCI layers. Each
+       TPID,TCI pair is represented as a single 32 bit
+       integer. Layers listed from outermost to innermost.
+    */
+    uint32_t vlan_qtag[FLOW_MAX_VLAN_HEADERS]; /* Stack in host byte order. */
+    uint32_t vlan_stack_depth;                 /* Stack depth. */
 };
 
 struct dpif_sflow *dpif_sflow_create(void);
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 0c2ea38..f096175 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -6333,6 +6333,89 @@  HEADER
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif - sFlow packet sampling - Extended VLAN tunnel])
+AT_XFAIL_IF([test "$IS_WIN32" = "yes"])
+OVS_VSWITCHD_START
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+add_of_ports br0 1 2
+AT_DATA([flows.txt], [dnl
+table=0 dl_src=50:54:00:00:00:10 actions=strip_vlan,2
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl set up sFlow logging
+AT_CHECK([ovstest test-sflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore])
+AT_CAPTURE_FILE([sflow.log])
+PARSE_LISTENING_PORT([test-sflow.log], [SFLOW_PORT])
+ovs-appctl time/stop
+
+dnl configure sflow
+ovs-vsctl \
+   set Bridge br0 sflow=@sf -- \
+   --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \
+     header=128 sampling=1 polling=0
+
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type(0x88a8),vlan(vid=150,pcp=0,cfi=1),encap(eth_type(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800)))'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:10,dst=50:54:00:00:00:0a),eth_type(0x8100),vlan(vid=2000,pcp=0,cfi=1),encap(eth_type(0x0800))'])
+
+dnl sleep long enough to get the sFlow datagram flushed out (may be delayed for up to 1 second)
+for i in `seq 1 30`; do
+    ovs-appctl time/warp 100
+done
+
+OVS_APP_EXIT_AND_WAIT([test-sflow])
+
+AT_CHECK_UNQUOTED([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\
+        /g']], [0], [dnl
+HEADER
+        dgramSeqNo=1
+        ds=127.0.0.1>2:1000
+        fsSeqNo=1
+        ext_vlan_tpid_0=0x88a8
+        ext_vlan_vid_0=150
+        ext_vlan_pcp_0=0
+        ext_vlan_cfi_0=1
+        in_vlan=150
+        in_priority=0
+        out_vlan=0
+        out_priority=0
+        meanSkip=1
+        samplePool=1
+        dropEvents=0
+        in_ifindex=0
+        in_format=0
+        out_ifindex=1
+        out_format=2
+        hdr_prot=1
+        pkt_len=22
+        stripped=4
+        hdr_len=18
+        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-88-A8-00-96-81-00
+HEADER
+        dgramSeqNo=1
+        ds=127.0.0.1>2:1000
+        fsSeqNo=2
+        in_vlan=2000
+        in_priority=0
+        out_vlan=0
+        out_priority=0
+        meanSkip=1
+        samplePool=2
+        dropEvents=0
+        in_ifindex=0
+        in_format=0
+        out_ifindex=1
+        out_format=2
+        hdr_prot=1
+        pkt_len=42
+        stripped=4
+        hdr_len=38
+        hdr=50-54-00-00-00-0A-50-54-00-00-00-10-81-00-07-D0-08-00-45-00-00-14-00-00-00-00-00-00-BA-EB-00-00-00-00-00-00-00-00
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 
 # CHECK_NETFLOW_EXPIRATION(LOOPBACK_ADDR)
 #
diff --git a/tests/test-sflow.c b/tests/test-sflow.c
index 6125d38..7d58ef3 100644
--- a/tests/test-sflow.c
+++ b/tests/test-sflow.c
@@ -65,6 +65,7 @@  static unixctl_cb_func test_sflow_exit;
 #define SFLOW_TAG_PKT_TUNNEL_VNI_OUT 1029
 #define SFLOW_TAG_PKT_TUNNEL_VNI_IN 1030
 #define SFLOW_TAG_PKT_MPLS 1006
+#define SFLOW_TAG_PKT_EXTENDED_VLAN 1012
 
 /* string sizes */
 #define SFL_MAX_PORTNAME_LEN 255
@@ -116,6 +117,7 @@  struct sflow_xdr {
 	uint32_t TUNNEL_VNI_OUT;
 	uint32_t TUNNEL_VNI_IN;
 	uint32_t MPLS;
+        uint32_t EXT_VLAN;
 	uint32_t IFCOUNTERS;
 	uint32_t ETHCOUNTERS;
 	uint32_t LACPCOUNTERS;
@@ -428,6 +430,25 @@  process_flow_sample(struct sflow_xdr *x)
             }
         }
 
+        if (x->offset.EXT_VLAN) {
+            uint32_t stack_depth, ii;
+            union flow_vlan_hdr vlan;
+            sflowxdr_setc(x, x->offset.EXT_VLAN);
+            /* print stripped VLAN tags stack */
+            stack_depth = sflowxdr_next(x);
+            for (ii = 0; ii < stack_depth; ii++) {
+                vlan.qtag=sflowxdr_next_n(x);
+                printf(" ext_vlan_tpid_%"PRIu32"=0x%"PRIx32,
+                       ii, ntohs(vlan.tpid));
+                printf(" ext_vlan_vid_%"PRIu32"=%"PRIu32,
+                       ii, vlan_tci_to_vid(vlan.tci));
+                printf(" ext_vlan_pcp_%"PRIu32"=%"PRIu32,
+                       ii, vlan_tci_to_pcp(vlan.tci));
+                printf(" ext_vlan_cfi_%"PRIu32"=%"PRIu32,
+                       ii, vlan_tci_to_cfi(vlan.tci));
+            }
+        }
+
         if (x->offset.SWITCH) {
             sflowxdr_setc(x, x->offset.SWITCH);
             printf(" in_vlan=%"PRIu32, sflowxdr_next(x));
@@ -634,6 +655,10 @@  process_datagram(struct sflow_xdr *x)
                     sflowxdr_mark_unique(x, &x->offset.MPLS);
                     break;
 
+                case SFLOW_TAG_PKT_EXTENDED_VLAN:
+                    sflowxdr_mark_unique(x, &x->offset.EXT_VLAN);
+                    break;
+
                     /* Add others here... */
                 }