diff mbox

[net-next,01/13] openvswitch: split flow structures into ovs specific and generic ones

Message ID 1409736300-12303-2-git-send-email-jiri@resnulli.us
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Pirko Sept. 3, 2014, 9:24 a.m. UTC
After this, flow related structures can be used in other code.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
 include/net/sw_flow.h          |  99 ++++++++++++++++++++++++++++++++++
 net/openvswitch/actions.c      |   3 +-
 net/openvswitch/datapath.c     |  74 +++++++++++++-------------
 net/openvswitch/datapath.h     |   4 +-
 net/openvswitch/flow.c         |   6 +--
 net/openvswitch/flow.h         | 102 +++++++----------------------------
 net/openvswitch/flow_netlink.c |  53 +++++++++---------
 net/openvswitch/flow_netlink.h |  10 ++--
 net/openvswitch/flow_table.c   | 118 ++++++++++++++++++++++-------------------
 net/openvswitch/flow_table.h   |  30 +++++------
 net/openvswitch/vport-gre.c    |   4 +-
 net/openvswitch/vport-vxlan.c  |   2 +-
 net/openvswitch/vport.c        |   2 +-
 net/openvswitch/vport.h        |   2 +-
 14 files changed, 276 insertions(+), 233 deletions(-)
 create mode 100644 include/net/sw_flow.h

Comments

John Fastabend Sept. 3, 2014, 3:20 p.m. UTC | #1
On 09/03/2014 02:24 AM, Jiri Pirko wrote:
> After this, flow related structures can be used in other code.
>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---

Hi Jiri,

As I indicated before I'm looking into integrating this with some
hardware here. Progress is a bit slow but starting to look at it.The
i40e/ixgbe driver being one open source example with very limited
support for tables, flow matches, etc. And then a closed source driver
with much more flexibility. What I don't have is a middle of the road
switch to work with something better then a host nic but not as
flexible as a TOR.

Couple questions my assumption here is I can extend the flow_key
as needed to support additional match criteria my hardware has.
I scanned the ./net/openvswitch source and I didn't catch any
place that would break but might need to take a closer look.
Similarly the actions set will need to be extended. For example
if I want to use this with i40e a OVS_ACTION_ATTR_QUEUE could
be used to steer packets to the queue. With this in mind we
will want a follow up patch to rename OVS_ACTION_ATTR_* to
FLOW_ACTION_ATTR_*

Also I have some filters that can match on offset/length/mask
tuples. As far as I can tell this is going to have to be yet
another interface? Or would it be worth the effort to define
the flow key more generically. My initial guess is I'll just
write a separate interface. I think this is what Jamal referred
to as another "classifier".

Thanks,
John

[...]

> +
> +struct sw_flow_key_ipv4_tunnel {
> +	__be64 tun_id;
> +	__be32 ipv4_src;
> +	__be32 ipv4_dst;
> +	__be16 tun_flags;
> +	u8   ipv4_tos;
> +	u8   ipv4_ttl;
> +};
> +
> +struct sw_flow_key {
> +	struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
> +	struct {
> +		u32	priority;	/* Packet QoS priority. */
> +		u32	skb_mark;	/* SKB mark. */
> +		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
> +	} __packed phy; /* Safe when right after 'tun_key'. */
> +	struct {
> +		u8     src[ETH_ALEN];	/* Ethernet source address. */
> +		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
> +		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
> +		__be16 type;		/* Ethernet frame type. */
> +	} eth;
> +	struct {
> +		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
> +		u8     tos;		/* IP ToS. */
> +		u8     ttl;		/* IP TTL/hop limit. */
> +		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
> +	} ip;
> +	struct {
> +		__be16 src;		/* TCP/UDP/SCTP source port. */
> +		__be16 dst;		/* TCP/UDP/SCTP destination port. */
> +		__be16 flags;		/* TCP flags. */
> +	} tp;
> +	union {
> +		struct {
> +			struct {
> +				__be32 src;	/* IP source address. */
> +				__be32 dst;	/* IP destination address. */
> +			} addr;
> +			struct {
> +				u8 sha[ETH_ALEN];	/* ARP source hardware address. */
> +				u8 tha[ETH_ALEN];	/* ARP target hardware address. */
> +			} arp;
> +		} ipv4;
> +		struct {
> +			struct {
> +				struct in6_addr src;	/* IPv6 source address. */
> +				struct in6_addr dst;	/* IPv6 destination address. */
> +			} addr;
> +			__be32 label;			/* IPv6 flow label. */
> +			struct {
> +				struct in6_addr target;	/* ND target address. */
> +				u8 sll[ETH_ALEN];	/* ND source link layer address. */
> +				u8 tll[ETH_ALEN];	/* ND target link layer address. */
> +			} nd;
> +		} ipv6;
> +	};
> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
> +
> +struct sw_flow_key_range {
> +	unsigned short int start;
> +	unsigned short int end;
> +};
> +
> +struct sw_flow_mask {
> +	struct sw_flow_key_range range;
> +	struct sw_flow_key key;
> +};
> +
> +struct sw_flow_action {
> +};
> +
> +struct sw_flow_actions {
> +	unsigned count;
> +	struct sw_flow_action actions[0];
> +};
> +
> +struct sw_flow {
> +	struct sw_flow_key key;
> +	struct sw_flow_key unmasked_key;
> +	struct sw_flow_mask *mask;
> +	struct sw_flow_actions *actions;
> +};
> +
Pravin B Shelar Sept. 3, 2014, 6:41 p.m. UTC | #2
On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> After this, flow related structures can be used in other code.
>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
> ---
>  include/net/sw_flow.h          |  99 ++++++++++++++++++++++++++++++++++
>  net/openvswitch/actions.c      |   3 +-
>  net/openvswitch/datapath.c     |  74 +++++++++++++-------------
>  net/openvswitch/datapath.h     |   4 +-
>  net/openvswitch/flow.c         |   6 +--
>  net/openvswitch/flow.h         | 102 +++++++----------------------------
>  net/openvswitch/flow_netlink.c |  53 +++++++++---------
>  net/openvswitch/flow_netlink.h |  10 ++--
>  net/openvswitch/flow_table.c   | 118 ++++++++++++++++++++++-------------------
>  net/openvswitch/flow_table.h   |  30 +++++------
>  net/openvswitch/vport-gre.c    |   4 +-
>  net/openvswitch/vport-vxlan.c  |   2 +-
>  net/openvswitch/vport.c        |   2 +-
>  net/openvswitch/vport.h        |   2 +-
>  14 files changed, 276 insertions(+), 233 deletions(-)
>  create mode 100644 include/net/sw_flow.h
>
> diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
> new file mode 100644
> index 0000000..21724f1
> --- /dev/null
> +++ b/include/net/sw_flow.h
> @@ -0,0 +1,99 @@
> +/*
> + * include/net/sw_flow.h - Generic switch flow structures
> + * Copyright (c) 2007-2012 Nicira, Inc.
> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#ifndef _NET_SW_FLOW_H_
> +#define _NET_SW_FLOW_H_
> +
> +struct sw_flow_key_ipv4_tunnel {
> +       __be64 tun_id;
> +       __be32 ipv4_src;
> +       __be32 ipv4_dst;
> +       __be16 tun_flags;
> +       u8   ipv4_tos;
> +       u8   ipv4_ttl;
> +};
> +
> +struct sw_flow_key {
> +       struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
> +       struct {
> +               u32     priority;       /* Packet QoS priority. */
> +               u32     skb_mark;       /* SKB mark. */
> +               u16     in_port;        /* Input switch port (or DP_MAX_PORTS). */
> +       } __packed phy; /* Safe when right after 'tun_key'. */
> +       struct {
> +               u8     src[ETH_ALEN];   /* Ethernet source address. */
> +               u8     dst[ETH_ALEN];   /* Ethernet destination address. */
> +               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
> +               __be16 type;            /* Ethernet frame type. */
> +       } eth;
> +       struct {
> +               u8     proto;           /* IP protocol or lower 8 bits of ARP opcode. */
> +               u8     tos;             /* IP ToS. */
> +               u8     ttl;             /* IP TTL/hop limit. */
> +               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
> +       } ip;
> +       struct {
> +               __be16 src;             /* TCP/UDP/SCTP source port. */
> +               __be16 dst;             /* TCP/UDP/SCTP destination port. */
> +               __be16 flags;           /* TCP flags. */
> +       } tp;
> +       union {
> +               struct {
> +                       struct {
> +                               __be32 src;     /* IP source address. */
> +                               __be32 dst;     /* IP destination address. */
> +                       } addr;
> +                       struct {
> +                               u8 sha[ETH_ALEN];       /* ARP source hardware address. */
> +                               u8 tha[ETH_ALEN];       /* ARP target hardware address. */
> +                       } arp;
> +               } ipv4;
> +               struct {
> +                       struct {
> +                               struct in6_addr src;    /* IPv6 source address. */
> +                               struct in6_addr dst;    /* IPv6 destination address. */
> +                       } addr;
> +                       __be32 label;                   /* IPv6 flow label. */
> +                       struct {
> +                               struct in6_addr target; /* ND target address. */
> +                               u8 sll[ETH_ALEN];       /* ND source link layer address. */
> +                               u8 tll[ETH_ALEN];       /* ND target link layer address. */
> +                       } nd;
> +               } ipv6;
> +       };
> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
> +

HW offload API should be separate from OVS module. This has following
advantages.
1. It can be managed by OVS userspace vswitchd process which has much
better context to setup hardware flow table. Once we add capabilities
for swdev, it is much more easier for vswitchd process to choose
correct (hw or sw) flow table for given flow.
2. Other application that wants to use HW offload does not have
dependency on OVS kernel module.
3. Hardware and software datapath remains separate, these two
components has no dependency on each other, both can be developed
independent of each other.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pravin B Shelar Sept. 3, 2014, 6:42 p.m. UTC | #3
On Wed, Sep 3, 2014 at 8:20 AM, John Fastabend <john.fastabend@gmail.com> wrote:
> On 09/03/2014 02:24 AM, Jiri Pirko wrote:
>>
>> After this, flow related structures can be used in other code.
>>
>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>> ---
>
>
> Hi Jiri,
>
> As I indicated before I'm looking into integrating this with some
> hardware here. Progress is a bit slow but starting to look at it.The
> i40e/ixgbe driver being one open source example with very limited
> support for tables, flow matches, etc. And then a closed source driver
> with much more flexibility. What I don't have is a middle of the road
> switch to work with something better then a host nic but not as
> flexible as a TOR.
>
> Couple questions my assumption here is I can extend the flow_key
> as needed to support additional match criteria my hardware has.
> I scanned the ./net/openvswitch source and I didn't catch any
> place that would break but might need to take a closer look.
> Similarly the actions set will need to be extended. For example
> if I want to use this with i40e a OVS_ACTION_ATTR_QUEUE could
> be used to steer packets to the queue. With this in mind we
> will want a follow up patch to rename OVS_ACTION_ATTR_* to
> FLOW_ACTION_ATTR_*
>

struct sw_flow_key is internal structure of OVS, it is designed to
have better flow-table performance. By adding hw specific fields in
sw_flow_key, it increase flow-key size and that has negative impact on
OVS software switching performance. Therefore it is better not to
share this internal structure with driver interface.

Thanks.

> Also I have some filters that can match on offset/length/mask
> tuples. As far as I can tell this is going to have to be yet
> another interface? Or would it be worth the effort to define
> the flow key more generically. My initial guess is I'll just
> write a separate interface. I think this is what Jamal referred
> to as another "classifier".
>
> Thanks,
> John
>
> [...]
>
>
>> +
>> +struct sw_flow_key_ipv4_tunnel {
>> +       __be64 tun_id;
>> +       __be32 ipv4_src;
>> +       __be32 ipv4_dst;
>> +       __be16 tun_flags;
>> +       u8   ipv4_tos;
>> +       u8   ipv4_ttl;
>> +};
>> +
>> +struct sw_flow_key {
>> +       struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel
>> key. */
>> +       struct {
>> +               u32     priority;       /* Packet QoS priority. */
>> +               u32     skb_mark;       /* SKB mark. */
>> +               u16     in_port;        /* Input switch port (or
>> DP_MAX_PORTS). */
>> +       } __packed phy; /* Safe when right after 'tun_key'. */
>> +       struct {
>> +               u8     src[ETH_ALEN];   /* Ethernet source address. */
>> +               u8     dst[ETH_ALEN];   /* Ethernet destination address.
>> */
>> +               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT
>> set otherwise. */
>> +               __be16 type;            /* Ethernet frame type. */
>> +       } eth;
>> +       struct {
>> +               u8     proto;           /* IP protocol or lower 8 bits of
>> ARP opcode. */
>> +               u8     tos;             /* IP ToS. */
>> +               u8     ttl;             /* IP TTL/hop limit. */
>> +               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
>> +       } ip;
>> +       struct {
>> +               __be16 src;             /* TCP/UDP/SCTP source port. */
>> +               __be16 dst;             /* TCP/UDP/SCTP destination port.
>> */
>> +               __be16 flags;           /* TCP flags. */
>> +       } tp;
>> +       union {
>> +               struct {
>> +                       struct {
>> +                               __be32 src;     /* IP source address. */
>> +                               __be32 dst;     /* IP destination address.
>> */
>> +                       } addr;
>> +                       struct {
>> +                               u8 sha[ETH_ALEN];       /* ARP source
>> hardware address. */
>> +                               u8 tha[ETH_ALEN];       /* ARP target
>> hardware address. */
>> +                       } arp;
>> +               } ipv4;
>> +               struct {
>> +                       struct {
>> +                               struct in6_addr src;    /* IPv6 source
>> address. */
>> +                               struct in6_addr dst;    /* IPv6
>> destination address. */
>> +                       } addr;
>> +                       __be32 label;                   /* IPv6 flow
>> label. */
>> +                       struct {
>> +                               struct in6_addr target; /* ND target
>> address. */
>> +                               u8 sll[ETH_ALEN];       /* ND source link
>> layer address. */
>> +                               u8 tll[ETH_ALEN];       /* ND target link
>> layer address. */
>> +                       } nd;
>> +               } ipv6;
>> +       };
>> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as
>> longs. */
>> +
>> +struct sw_flow_key_range {
>> +       unsigned short int start;
>> +       unsigned short int end;
>> +};
>> +
>> +struct sw_flow_mask {
>> +       struct sw_flow_key_range range;
>> +       struct sw_flow_key key;
>> +};
>> +
>> +struct sw_flow_action {
>> +};
>> +
>> +struct sw_flow_actions {
>> +       unsigned count;
>> +       struct sw_flow_action actions[0];
>> +};
>> +
>> +struct sw_flow {
>> +       struct sw_flow_key key;
>> +       struct sw_flow_key unmasked_key;
>> +       struct sw_flow_mask *mask;
>> +       struct sw_flow_actions *actions;
>> +};
>> +
>
>
>
> --
> John Fastabend         Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jamal Hadi Salim Sept. 3, 2014, 9:11 p.m. UTC | #4
On 09/03/14 11:20, John Fastabend wrote:

> Also I have some filters that can match on offset/length/mask
> tuples. As far as I can tell this is going to have to be yet
> another interface? Or would it be worth the effort to define
> the flow key more generically. My initial guess is I'll just
> write a separate interface. I think this is what Jamal referred
> to as another "classifier".
>

Exactly. I have more complex classifiers as stated earlier.
I am afraid these patches again are not satisfying that need.

In any case - we are taking a different tact than these patches
do and hopefully at some point we can merge thoughts.

cheers,
jamal

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jamal Hadi Salim Sept. 3, 2014, 9:22 p.m. UTC | #5
On 09/03/14 14:41, Pravin Shelar wrote:
> On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko <jiri@resnulli.us> wrote:

> HW offload API should be separate from OVS module.

The above part i agree with. Infact it is very odd that it seems
hard to get this point across ;->

> This has following
> advantages.
> 1. It can be managed by OVS userspace vswitchd process which has much
> better context to setup hardware flow table. Once we add capabilities
> for swdev, it is much more easier for vswitchd process to choose
> correct (hw or sw) flow table for given flow.

This i disagree with.
The desire is to have existing user tools to work with offloads.
When necessary, we then create new tools.
Existing tools may need to be taught to do selectively do
hardware vs software offload. We have a precedence with
bridging code which selectively offloads to hardware using iproute2.

> 2. Other application that wants to use HW offload does not have
> dependency on OVS kernel module.

Or on OF for that matter.

> 3. Hardware and software datapath remains separate, these two
> components has no dependency on each other, both can be developed
> independent of each other.
>

The basic definition of "offload" implies dependency;-> So,
I strongly disagree. You may need to go backwards and look at
views expressed on this (other than emails - theres slideware).

cheers,
jamal



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pravin B Shelar Sept. 3, 2014, 9:59 p.m. UTC | #6
On Wed, Sep 3, 2014 at 2:22 PM, Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> On 09/03/14 14:41, Pravin Shelar wrote:
>>
>> On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>
>
>> HW offload API should be separate from OVS module.
>
>
> The above part i agree with. Infact it is very odd that it seems
> hard to get this point across ;->
>
>
>> This has following
>> advantages.
>> 1. It can be managed by OVS userspace vswitchd process which has much
>> better context to setup hardware flow table. Once we add capabilities
>> for swdev, it is much more easier for vswitchd process to choose
>> correct (hw or sw) flow table for given flow.
>
>
> This i disagree with.
> The desire is to have existing user tools to work with offloads.
> When necessary, we then create new tools.
> Existing tools may need to be taught to do selectively do
> hardware vs software offload. We have a precedence with
> bridging code which selectively offloads to hardware using iproute2.
>
Both of us are saying same thing.
What I meant was for OVS use-case, where OVS wants to use offload for
switching flows, vswitchd userspace process can program HW offload
using kernel HW offload APIs directly from userspace, rather than
going through OVS kernel module. If user wants to use some other tool,
then the tool can use same kernel HW offload APIs.

>
>> 2. Other application that wants to use HW offload does not have
>> dependency on OVS kernel module.
>
>
> Or on OF for that matter.
>
>
>> 3. Hardware and software datapath remains separate, these two
>> components has no dependency on each other, both can be developed
>> independent of each other.
>>
>
> The basic definition of "offload" implies dependency;-> So,
> I strongly disagree. You may need to go backwards and look at
> views expressed on this (other than emails - theres slideware).
>

I was referring to code dependency in kernel. For example ovs flow-key
structure used. This complicates OVS internal structure which needs to
be shared plus OVS might need to extend interface for configuring HW
match or action that does not exist in OVS software datapath.

I agree these two components are related and that dependency can be
handled from userspace.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jamal Hadi Salim Sept. 4, 2014, 1:54 a.m. UTC | #7
On 09/03/14 17:59, Pravin Shelar wrote:

> Both of us are saying same thing.
> What I meant was for OVS use-case, where OVS wants to use offload for
> switching flows, vswitchd userspace process can program HW offload
> using kernel HW offload APIs directly from userspace, rather than
> going through OVS kernel module. If user wants to use some other tool,
> then the tool can use same kernel HW offload APIs.

Ok, sorry, you are right - we are saying the same thing.

cheers,
jamal
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Sept. 4, 2014, 12:09 p.m. UTC | #8
Wed, Sep 03, 2014 at 05:20:25PM CEST, john.fastabend@gmail.com wrote:
>On 09/03/2014 02:24 AM, Jiri Pirko wrote:
>>After this, flow related structures can be used in other code.
>>
>>Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>>---
>
>Hi Jiri,
>
>As I indicated before I'm looking into integrating this with some
>hardware here. Progress is a bit slow but starting to look at it.The
>i40e/ixgbe driver being one open source example with very limited
>support for tables, flow matches, etc. And then a closed source driver
>with much more flexibility. What I don't have is a middle of the road
>switch to work with something better then a host nic but not as
>flexible as a TOR.
>
>Couple questions my assumption here is I can extend the flow_key
>as needed to support additional match criteria my hardware has.
>I scanned the ./net/openvswitch source and I didn't catch any
>place that would break but might need to take a closer look.
>Similarly the actions set will need to be extended. For example
>if I want to use this with i40e a OVS_ACTION_ATTR_QUEUE could
>be used to steer packets to the queue. With this in mind we
>will want a follow up patch to rename OVS_ACTION_ATTR_* to
>FLOW_ACTION_ATTR_*
>
>Also I have some filters that can match on offset/length/mask
>tuples. As far as I can tell this is going to have to be yet
>another interface? Or would it be worth the effort to define
>the flow key more generically. My initial guess is I'll just
>write a separate interface. I think this is what Jamal referred
>to as another "classifier".

I'm thinking about using some more generic match key. It would
incorporate ovs key and possible other classifiers (as your
off/len/mask) as well. Drivers will have free will to implement whatever
the hw supports.

Will do it for the next version of the patchset (most probably after I
return from holliday, Sep 15).


>
>Thanks,
>John
>
>[...]
>
>>+
>>+struct sw_flow_key_ipv4_tunnel {
>>+	__be64 tun_id;
>>+	__be32 ipv4_src;
>>+	__be32 ipv4_dst;
>>+	__be16 tun_flags;
>>+	u8   ipv4_tos;
>>+	u8   ipv4_ttl;
>>+};
>>+
>>+struct sw_flow_key {
>>+	struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
>>+	struct {
>>+		u32	priority;	/* Packet QoS priority. */
>>+		u32	skb_mark;	/* SKB mark. */
>>+		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
>>+	} __packed phy; /* Safe when right after 'tun_key'. */
>>+	struct {
>>+		u8     src[ETH_ALEN];	/* Ethernet source address. */
>>+		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
>>+		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
>>+		__be16 type;		/* Ethernet frame type. */
>>+	} eth;
>>+	struct {
>>+		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
>>+		u8     tos;		/* IP ToS. */
>>+		u8     ttl;		/* IP TTL/hop limit. */
>>+		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
>>+	} ip;
>>+	struct {
>>+		__be16 src;		/* TCP/UDP/SCTP source port. */
>>+		__be16 dst;		/* TCP/UDP/SCTP destination port. */
>>+		__be16 flags;		/* TCP flags. */
>>+	} tp;
>>+	union {
>>+		struct {
>>+			struct {
>>+				__be32 src;	/* IP source address. */
>>+				__be32 dst;	/* IP destination address. */
>>+			} addr;
>>+			struct {
>>+				u8 sha[ETH_ALEN];	/* ARP source hardware address. */
>>+				u8 tha[ETH_ALEN];	/* ARP target hardware address. */
>>+			} arp;
>>+		} ipv4;
>>+		struct {
>>+			struct {
>>+				struct in6_addr src;	/* IPv6 source address. */
>>+				struct in6_addr dst;	/* IPv6 destination address. */
>>+			} addr;
>>+			__be32 label;			/* IPv6 flow label. */
>>+			struct {
>>+				struct in6_addr target;	/* ND target address. */
>>+				u8 sll[ETH_ALEN];	/* ND source link layer address. */
>>+				u8 tll[ETH_ALEN];	/* ND target link layer address. */
>>+			} nd;
>>+		} ipv6;
>>+	};
>>+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
>>+
>>+struct sw_flow_key_range {
>>+	unsigned short int start;
>>+	unsigned short int end;
>>+};
>>+
>>+struct sw_flow_mask {
>>+	struct sw_flow_key_range range;
>>+	struct sw_flow_key key;
>>+};
>>+
>>+struct sw_flow_action {
>>+};
>>+
>>+struct sw_flow_actions {
>>+	unsigned count;
>>+	struct sw_flow_action actions[0];
>>+};
>>+
>>+struct sw_flow {
>>+	struct sw_flow_key key;
>>+	struct sw_flow_key unmasked_key;
>>+	struct sw_flow_mask *mask;
>>+	struct sw_flow_actions *actions;
>>+};
>>+
>
>
>-- 
>John Fastabend         Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Sept. 4, 2014, 12:25 p.m. UTC | #9
Wed, Sep 03, 2014 at 08:42:18PM CEST, pshelar@nicira.com wrote:
>On Wed, Sep 3, 2014 at 8:20 AM, John Fastabend <john.fastabend@gmail.com> wrote:
>> On 09/03/2014 02:24 AM, Jiri Pirko wrote:
>>>
>>> After this, flow related structures can be used in other code.
>>>
>>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>>> ---
>>
>>
>> Hi Jiri,
>>
>> As I indicated before I'm looking into integrating this with some
>> hardware here. Progress is a bit slow but starting to look at it.The
>> i40e/ixgbe driver being one open source example with very limited
>> support for tables, flow matches, etc. And then a closed source driver
>> with much more flexibility. What I don't have is a middle of the road
>> switch to work with something better then a host nic but not as
>> flexible as a TOR.
>>
>> Couple questions my assumption here is I can extend the flow_key
>> as needed to support additional match criteria my hardware has.
>> I scanned the ./net/openvswitch source and I didn't catch any
>> place that would break but might need to take a closer look.
>> Similarly the actions set will need to be extended. For example
>> if I want to use this with i40e a OVS_ACTION_ATTR_QUEUE could
>> be used to steer packets to the queue. With this in mind we
>> will want a follow up patch to rename OVS_ACTION_ATTR_* to
>> FLOW_ACTION_ATTR_*
>>
>
>struct sw_flow_key is internal structure of OVS, it is designed to
>have better flow-table performance. By adding hw specific fields in
>sw_flow_key, it increase flow-key size and that has negative impact on
>OVS software switching performance. Therefore it is better not to
>share this internal structure with driver interface.

Ok. I will split this leaving the sw_flow_key into ovs and introducing
new one. Thanks.

>
>Thanks.
>
>> Also I have some filters that can match on offset/length/mask
>> tuples. As far as I can tell this is going to have to be yet
>> another interface? Or would it be worth the effort to define
>> the flow key more generically. My initial guess is I'll just
>> write a separate interface. I think this is what Jamal referred
>> to as another "classifier".
>>
>> Thanks,
>> John
>>
>> [...]
>>
>>
>>> +
>>> +struct sw_flow_key_ipv4_tunnel {
>>> +       __be64 tun_id;
>>> +       __be32 ipv4_src;
>>> +       __be32 ipv4_dst;
>>> +       __be16 tun_flags;
>>> +       u8   ipv4_tos;
>>> +       u8   ipv4_ttl;
>>> +};
>>> +
>>> +struct sw_flow_key {
>>> +       struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel
>>> key. */
>>> +       struct {
>>> +               u32     priority;       /* Packet QoS priority. */
>>> +               u32     skb_mark;       /* SKB mark. */
>>> +               u16     in_port;        /* Input switch port (or
>>> DP_MAX_PORTS). */
>>> +       } __packed phy; /* Safe when right after 'tun_key'. */
>>> +       struct {
>>> +               u8     src[ETH_ALEN];   /* Ethernet source address. */
>>> +               u8     dst[ETH_ALEN];   /* Ethernet destination address.
>>> */
>>> +               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT
>>> set otherwise. */
>>> +               __be16 type;            /* Ethernet frame type. */
>>> +       } eth;
>>> +       struct {
>>> +               u8     proto;           /* IP protocol or lower 8 bits of
>>> ARP opcode. */
>>> +               u8     tos;             /* IP ToS. */
>>> +               u8     ttl;             /* IP TTL/hop limit. */
>>> +               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
>>> +       } ip;
>>> +       struct {
>>> +               __be16 src;             /* TCP/UDP/SCTP source port. */
>>> +               __be16 dst;             /* TCP/UDP/SCTP destination port.
>>> */
>>> +               __be16 flags;           /* TCP flags. */
>>> +       } tp;
>>> +       union {
>>> +               struct {
>>> +                       struct {
>>> +                               __be32 src;     /* IP source address. */
>>> +                               __be32 dst;     /* IP destination address.
>>> */
>>> +                       } addr;
>>> +                       struct {
>>> +                               u8 sha[ETH_ALEN];       /* ARP source
>>> hardware address. */
>>> +                               u8 tha[ETH_ALEN];       /* ARP target
>>> hardware address. */
>>> +                       } arp;
>>> +               } ipv4;
>>> +               struct {
>>> +                       struct {
>>> +                               struct in6_addr src;    /* IPv6 source
>>> address. */
>>> +                               struct in6_addr dst;    /* IPv6
>>> destination address. */
>>> +                       } addr;
>>> +                       __be32 label;                   /* IPv6 flow
>>> label. */
>>> +                       struct {
>>> +                               struct in6_addr target; /* ND target
>>> address. */
>>> +                               u8 sll[ETH_ALEN];       /* ND source link
>>> layer address. */
>>> +                               u8 tll[ETH_ALEN];       /* ND target link
>>> layer address. */
>>> +                       } nd;
>>> +               } ipv6;
>>> +       };
>>> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as
>>> longs. */
>>> +
>>> +struct sw_flow_key_range {
>>> +       unsigned short int start;
>>> +       unsigned short int end;
>>> +};
>>> +
>>> +struct sw_flow_mask {
>>> +       struct sw_flow_key_range range;
>>> +       struct sw_flow_key key;
>>> +};
>>> +
>>> +struct sw_flow_action {
>>> +};
>>> +
>>> +struct sw_flow_actions {
>>> +       unsigned count;
>>> +       struct sw_flow_action actions[0];
>>> +};
>>> +
>>> +struct sw_flow {
>>> +       struct sw_flow_key key;
>>> +       struct sw_flow_key unmasked_key;
>>> +       struct sw_flow_mask *mask;
>>> +       struct sw_flow_actions *actions;
>>> +};
>>> +
>>
>>
>>
>> --
>> John Fastabend         Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Sept. 4, 2014, 12:33 p.m. UTC | #10
Wed, Sep 03, 2014 at 08:41:39PM CEST, pshelar@nicira.com wrote:
>On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>> After this, flow related structures can be used in other code.
>>
>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>> ---
>>  include/net/sw_flow.h          |  99 ++++++++++++++++++++++++++++++++++
>>  net/openvswitch/actions.c      |   3 +-
>>  net/openvswitch/datapath.c     |  74 +++++++++++++-------------
>>  net/openvswitch/datapath.h     |   4 +-
>>  net/openvswitch/flow.c         |   6 +--
>>  net/openvswitch/flow.h         | 102 +++++++----------------------------
>>  net/openvswitch/flow_netlink.c |  53 +++++++++---------
>>  net/openvswitch/flow_netlink.h |  10 ++--
>>  net/openvswitch/flow_table.c   | 118 ++++++++++++++++++++++-------------------
>>  net/openvswitch/flow_table.h   |  30 +++++------
>>  net/openvswitch/vport-gre.c    |   4 +-
>>  net/openvswitch/vport-vxlan.c  |   2 +-
>>  net/openvswitch/vport.c        |   2 +-
>>  net/openvswitch/vport.h        |   2 +-
>>  14 files changed, 276 insertions(+), 233 deletions(-)
>>  create mode 100644 include/net/sw_flow.h
>>
>> diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
>> new file mode 100644
>> index 0000000..21724f1
>> --- /dev/null
>> +++ b/include/net/sw_flow.h
>> @@ -0,0 +1,99 @@
>> +/*
>> + * include/net/sw_flow.h - Generic switch flow structures
>> + * Copyright (c) 2007-2012 Nicira, Inc.
>> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + */
>> +
>> +#ifndef _NET_SW_FLOW_H_
>> +#define _NET_SW_FLOW_H_
>> +
>> +struct sw_flow_key_ipv4_tunnel {
>> +       __be64 tun_id;
>> +       __be32 ipv4_src;
>> +       __be32 ipv4_dst;
>> +       __be16 tun_flags;
>> +       u8   ipv4_tos;
>> +       u8   ipv4_ttl;
>> +};
>> +
>> +struct sw_flow_key {
>> +       struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
>> +       struct {
>> +               u32     priority;       /* Packet QoS priority. */
>> +               u32     skb_mark;       /* SKB mark. */
>> +               u16     in_port;        /* Input switch port (or DP_MAX_PORTS). */
>> +       } __packed phy; /* Safe when right after 'tun_key'. */
>> +       struct {
>> +               u8     src[ETH_ALEN];   /* Ethernet source address. */
>> +               u8     dst[ETH_ALEN];   /* Ethernet destination address. */
>> +               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
>> +               __be16 type;            /* Ethernet frame type. */
>> +       } eth;
>> +       struct {
>> +               u8     proto;           /* IP protocol or lower 8 bits of ARP opcode. */
>> +               u8     tos;             /* IP ToS. */
>> +               u8     ttl;             /* IP TTL/hop limit. */
>> +               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
>> +       } ip;
>> +       struct {
>> +               __be16 src;             /* TCP/UDP/SCTP source port. */
>> +               __be16 dst;             /* TCP/UDP/SCTP destination port. */
>> +               __be16 flags;           /* TCP flags. */
>> +       } tp;
>> +       union {
>> +               struct {
>> +                       struct {
>> +                               __be32 src;     /* IP source address. */
>> +                               __be32 dst;     /* IP destination address. */
>> +                       } addr;
>> +                       struct {
>> +                               u8 sha[ETH_ALEN];       /* ARP source hardware address. */
>> +                               u8 tha[ETH_ALEN];       /* ARP target hardware address. */
>> +                       } arp;
>> +               } ipv4;
>> +               struct {
>> +                       struct {
>> +                               struct in6_addr src;    /* IPv6 source address. */
>> +                               struct in6_addr dst;    /* IPv6 destination address. */
>> +                       } addr;
>> +                       __be32 label;                   /* IPv6 flow label. */
>> +                       struct {
>> +                               struct in6_addr target; /* ND target address. */
>> +                               u8 sll[ETH_ALEN];       /* ND source link layer address. */
>> +                               u8 tll[ETH_ALEN];       /* ND target link layer address. */
>> +                       } nd;
>> +               } ipv6;
>> +       };
>> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
>> +
>
>HW offload API should be separate from OVS module. This has following
>advantages.
>1. It can be managed by OVS userspace vswitchd process which has much
>better context to setup hardware flow table. Once we add capabilities
>for swdev, it is much more easier for vswitchd process to choose
>correct (hw or sw) flow table for given flow.

The idea is to add a nl attr in ovs genl iface so the vswitchd can
speficify the flow the to be in sw only, in hw only, in both.
I believe that is is more convenient to let switchd to communicate flows
via single iface.

>2. Other application that wants to use HW offload does not have
>dependency on OVS kernel module.

That is not the case for this patchset. Userspace can insert/remove
flows using the switchdev generic netlink api - see:
[patch net-next 13/13] switchdev: introduce Netlink API

>3. Hardware and software datapath remains separate, these two
>components has no dependency on each other, both can be developed
>independent of each other.


The general idea is to have the offloads handled in-kernel. Therefore I
hooked on to ovs kernel dp code.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pravin B Shelar Sept. 4, 2014, 8:46 p.m. UTC | #11
On Thu, Sep 4, 2014 at 5:33 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> Wed, Sep 03, 2014 at 08:41:39PM CEST, pshelar@nicira.com wrote:
>>On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>>> After this, flow related structures can be used in other code.
>>>
>>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>>> ---
>>>  include/net/sw_flow.h          |  99 ++++++++++++++++++++++++++++++++++
>>>  net/openvswitch/actions.c      |   3 +-
>>>  net/openvswitch/datapath.c     |  74 +++++++++++++-------------
>>>  net/openvswitch/datapath.h     |   4 +-
>>>  net/openvswitch/flow.c         |   6 +--
>>>  net/openvswitch/flow.h         | 102 +++++++----------------------------
>>>  net/openvswitch/flow_netlink.c |  53 +++++++++---------
>>>  net/openvswitch/flow_netlink.h |  10 ++--
>>>  net/openvswitch/flow_table.c   | 118 ++++++++++++++++++++++-------------------
>>>  net/openvswitch/flow_table.h   |  30 +++++------
>>>  net/openvswitch/vport-gre.c    |   4 +-
>>>  net/openvswitch/vport-vxlan.c  |   2 +-
>>>  net/openvswitch/vport.c        |   2 +-
>>>  net/openvswitch/vport.h        |   2 +-
>>>  14 files changed, 276 insertions(+), 233 deletions(-)
>>>  create mode 100644 include/net/sw_flow.h
>>>
>>> diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
>>> new file mode 100644
>>> index 0000000..21724f1
>>> --- /dev/null
>>> +++ b/include/net/sw_flow.h
>>> @@ -0,0 +1,99 @@
>>> +/*
>>> + * include/net/sw_flow.h - Generic switch flow structures
>>> + * Copyright (c) 2007-2012 Nicira, Inc.
>>> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License as published by
>>> + * the Free Software Foundation; either version 2 of the License, or
>>> + * (at your option) any later version.
>>> + */
>>> +
>>> +#ifndef _NET_SW_FLOW_H_
>>> +#define _NET_SW_FLOW_H_
>>> +
>>> +struct sw_flow_key_ipv4_tunnel {
>>> +       __be64 tun_id;
>>> +       __be32 ipv4_src;
>>> +       __be32 ipv4_dst;
>>> +       __be16 tun_flags;
>>> +       u8   ipv4_tos;
>>> +       u8   ipv4_ttl;
>>> +};
>>> +
>>> +struct sw_flow_key {
>>> +       struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
>>> +       struct {
>>> +               u32     priority;       /* Packet QoS priority. */
>>> +               u32     skb_mark;       /* SKB mark. */
>>> +               u16     in_port;        /* Input switch port (or DP_MAX_PORTS). */
>>> +       } __packed phy; /* Safe when right after 'tun_key'. */
>>> +       struct {
>>> +               u8     src[ETH_ALEN];   /* Ethernet source address. */
>>> +               u8     dst[ETH_ALEN];   /* Ethernet destination address. */
>>> +               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
>>> +               __be16 type;            /* Ethernet frame type. */
>>> +       } eth;
>>> +       struct {
>>> +               u8     proto;           /* IP protocol or lower 8 bits of ARP opcode. */
>>> +               u8     tos;             /* IP ToS. */
>>> +               u8     ttl;             /* IP TTL/hop limit. */
>>> +               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
>>> +       } ip;
>>> +       struct {
>>> +               __be16 src;             /* TCP/UDP/SCTP source port. */
>>> +               __be16 dst;             /* TCP/UDP/SCTP destination port. */
>>> +               __be16 flags;           /* TCP flags. */
>>> +       } tp;
>>> +       union {
>>> +               struct {
>>> +                       struct {
>>> +                               __be32 src;     /* IP source address. */
>>> +                               __be32 dst;     /* IP destination address. */
>>> +                       } addr;
>>> +                       struct {
>>> +                               u8 sha[ETH_ALEN];       /* ARP source hardware address. */
>>> +                               u8 tha[ETH_ALEN];       /* ARP target hardware address. */
>>> +                       } arp;
>>> +               } ipv4;
>>> +               struct {
>>> +                       struct {
>>> +                               struct in6_addr src;    /* IPv6 source address. */
>>> +                               struct in6_addr dst;    /* IPv6 destination address. */
>>> +                       } addr;
>>> +                       __be32 label;                   /* IPv6 flow label. */
>>> +                       struct {
>>> +                               struct in6_addr target; /* ND target address. */
>>> +                               u8 sll[ETH_ALEN];       /* ND source link layer address. */
>>> +                               u8 tll[ETH_ALEN];       /* ND target link layer address. */
>>> +                       } nd;
>>> +               } ipv6;
>>> +       };
>>> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
>>> +
>>
>>HW offload API should be separate from OVS module. This has following
>>advantages.
>>1. It can be managed by OVS userspace vswitchd process which has much
>>better context to setup hardware flow table. Once we add capabilities
>>for swdev, it is much more easier for vswitchd process to choose
>>correct (hw or sw) flow table for given flow.
>
> The idea is to add a nl attr in ovs genl iface so the vswitchd can
> speficify the flow the to be in sw only, in hw only, in both.
> I believe that is is more convenient to let switchd to communicate flows
> via single iface.
>
How is it convenient? this patch complicates OVS kernel module. It add
OVS interfaces for HW offload. And you need similar interfaces for
switchdev device. So it duplicate code.
On the other hand if vswitchd uses common interface (switchdev) there
is no need to extend ovs kernel interface. For example specifying
extra metadata, like (sw only, hw olny, both).

>>2. Other application that wants to use HW offload does not have
>>dependency on OVS kernel module.
>
> That is not the case for this patchset. Userspace can insert/remove
> flows using the switchdev generic netlink api - see:
> [patch net-next 13/13] switchdev: introduce Netlink API
>
>>3. Hardware and software datapath remains separate, these two
>>components has no dependency on each other, both can be developed
>>independent of each other.
>
>
> The general idea is to have the offloads handled in-kernel. Therefore I
> hooked on to ovs kernel dp code.
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Sept. 17, 2014, 8:34 a.m. UTC | #12
Thu, Sep 04, 2014 at 10:46:28PM CEST, pshelar@nicira.com wrote:
>On Thu, Sep 4, 2014 at 5:33 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>> Wed, Sep 03, 2014 at 08:41:39PM CEST, pshelar@nicira.com wrote:
>>>On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>>>> After this, flow related structures can be used in other code.
>>>>
>>>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>>>> ---
>>>>  include/net/sw_flow.h          |  99 ++++++++++++++++++++++++++++++++++
>>>>  net/openvswitch/actions.c      |   3 +-
>>>>  net/openvswitch/datapath.c     |  74 +++++++++++++-------------
>>>>  net/openvswitch/datapath.h     |   4 +-
>>>>  net/openvswitch/flow.c         |   6 +--
>>>>  net/openvswitch/flow.h         | 102 +++++++----------------------------
>>>>  net/openvswitch/flow_netlink.c |  53 +++++++++---------
>>>>  net/openvswitch/flow_netlink.h |  10 ++--
>>>>  net/openvswitch/flow_table.c   | 118 ++++++++++++++++++++++-------------------
>>>>  net/openvswitch/flow_table.h   |  30 +++++------
>>>>  net/openvswitch/vport-gre.c    |   4 +-
>>>>  net/openvswitch/vport-vxlan.c  |   2 +-
>>>>  net/openvswitch/vport.c        |   2 +-
>>>>  net/openvswitch/vport.h        |   2 +-
>>>>  14 files changed, 276 insertions(+), 233 deletions(-)
>>>>  create mode 100644 include/net/sw_flow.h
>>>>
>>>> diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
>>>> new file mode 100644
>>>> index 0000000..21724f1
>>>> --- /dev/null
>>>> +++ b/include/net/sw_flow.h
>>>> @@ -0,0 +1,99 @@
>>>> +/*
>>>> + * include/net/sw_flow.h - Generic switch flow structures
>>>> + * Copyright (c) 2007-2012 Nicira, Inc.
>>>> + * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or modify
>>>> + * it under the terms of the GNU General Public License as published by
>>>> + * the Free Software Foundation; either version 2 of the License, or
>>>> + * (at your option) any later version.
>>>> + */
>>>> +
>>>> +#ifndef _NET_SW_FLOW_H_
>>>> +#define _NET_SW_FLOW_H_
>>>> +
>>>> +struct sw_flow_key_ipv4_tunnel {
>>>> +       __be64 tun_id;
>>>> +       __be32 ipv4_src;
>>>> +       __be32 ipv4_dst;
>>>> +       __be16 tun_flags;
>>>> +       u8   ipv4_tos;
>>>> +       u8   ipv4_ttl;
>>>> +};
>>>> +
>>>> +struct sw_flow_key {
>>>> +       struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
>>>> +       struct {
>>>> +               u32     priority;       /* Packet QoS priority. */
>>>> +               u32     skb_mark;       /* SKB mark. */
>>>> +               u16     in_port;        /* Input switch port (or DP_MAX_PORTS). */
>>>> +       } __packed phy; /* Safe when right after 'tun_key'. */
>>>> +       struct {
>>>> +               u8     src[ETH_ALEN];   /* Ethernet source address. */
>>>> +               u8     dst[ETH_ALEN];   /* Ethernet destination address. */
>>>> +               __be16 tci;             /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
>>>> +               __be16 type;            /* Ethernet frame type. */
>>>> +       } eth;
>>>> +       struct {
>>>> +               u8     proto;           /* IP protocol or lower 8 bits of ARP opcode. */
>>>> +               u8     tos;             /* IP ToS. */
>>>> +               u8     ttl;             /* IP TTL/hop limit. */
>>>> +               u8     frag;            /* One of OVS_FRAG_TYPE_*. */
>>>> +       } ip;
>>>> +       struct {
>>>> +               __be16 src;             /* TCP/UDP/SCTP source port. */
>>>> +               __be16 dst;             /* TCP/UDP/SCTP destination port. */
>>>> +               __be16 flags;           /* TCP flags. */
>>>> +       } tp;
>>>> +       union {
>>>> +               struct {
>>>> +                       struct {
>>>> +                               __be32 src;     /* IP source address. */
>>>> +                               __be32 dst;     /* IP destination address. */
>>>> +                       } addr;
>>>> +                       struct {
>>>> +                               u8 sha[ETH_ALEN];       /* ARP source hardware address. */
>>>> +                               u8 tha[ETH_ALEN];       /* ARP target hardware address. */
>>>> +                       } arp;
>>>> +               } ipv4;
>>>> +               struct {
>>>> +                       struct {
>>>> +                               struct in6_addr src;    /* IPv6 source address. */
>>>> +                               struct in6_addr dst;    /* IPv6 destination address. */
>>>> +                       } addr;
>>>> +                       __be32 label;                   /* IPv6 flow label. */
>>>> +                       struct {
>>>> +                               struct in6_addr target; /* ND target address. */
>>>> +                               u8 sll[ETH_ALEN];       /* ND source link layer address. */
>>>> +                               u8 tll[ETH_ALEN];       /* ND target link layer address. */
>>>> +                       } nd;
>>>> +               } ipv6;
>>>> +       };
>>>> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
>>>> +
>>>
>>>HW offload API should be separate from OVS module. This has following
>>>advantages.
>>>1. It can be managed by OVS userspace vswitchd process which has much
>>>better context to setup hardware flow table. Once we add capabilities
>>>for swdev, it is much more easier for vswitchd process to choose
>>>correct (hw or sw) flow table for given flow.
>>
>> The idea is to add a nl attr in ovs genl iface so the vswitchd can
>> speficify the flow the to be in sw only, in hw only, in both.
>> I believe that is is more convenient to let switchd to communicate flows
>> via single iface.
>>
>How is it convenient? this patch complicates OVS kernel module. It add
>OVS interfaces for HW offload. And you need similar interfaces for
>switchdev device. So it duplicate code.

There is almost no code duplication there. And in next patchset
iteration I plan to have even less.


>On the other hand if vswitchd uses common interface (switchdev) there
>is no need to extend ovs kernel interface. For example specifying
>extra metadata, like (sw only, hw olny, both).

I understand you point of view. However from the offloading perspective
it makes much more sense to push the flows through a single interface
(ovs genl) and only offload selected flows to hw (pushing further).
Having vswitchd to handle 2 different ifaces for the same/similar thing
does not seem like a clean solution to me. And it really breaks the
offloading view.

Plus the amount of code needed to be pushed into ovs kernel dp code in
order to enable this is small.


>
>>>2. Other application that wants to use HW offload does not have
>>>dependency on OVS kernel module.
>>
>> That is not the case for this patchset. Userspace can insert/remove
>> flows using the switchdev generic netlink api - see:
>> [patch net-next 13/13] switchdev: introduce Netlink API
>>
>>>3. Hardware and software datapath remains separate, these two
>>>components has no dependency on each other, both can be developed
>>>independent of each other.
>>
>>
>> The general idea is to have the offloads handled in-kernel. Therefore I
>> hooked on to ovs kernel dp code.
>>
>>
>>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross Sept. 17, 2014, 10:07 p.m. UTC | #13
On Wed, Sep 17, 2014 at 1:34 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> Thu, Sep 04, 2014 at 10:46:28PM CEST, pshelar@nicira.com wrote:
>>On the other hand if vswitchd uses common interface (switchdev) there
>>is no need to extend ovs kernel interface. For example specifying
>>extra metadata, like (sw only, hw olny, both).
>
> I understand you point of view. However from the offloading perspective
> it makes much more sense to push the flows through a single interface
> (ovs genl) and only offload selected flows to hw (pushing further).
> Having vswitchd to handle 2 different ifaces for the same/similar thing
> does not seem like a clean solution to me. And it really breaks the
> offloading view.
>
> Plus the amount of code needed to be pushed into ovs kernel dp code in
> order to enable this is small.

This is missing the point: software forwarding and a hardware driver
interface are doing totally different things. Over time, these will
diverge and you will essentially up with two separate paths packed
together which doesn't help anything. This is not a theoretical
concern as different directions either already exist or have been
proposed. On the software side, there is the BPF proposal which is not
likely to map to hardware any time soon. On the other hand, hardware
is usually composed of multiple tables/functions with varying
capabilities. Sooner or later you will want to take advantage of these
and doing so isn't really possible with the software optimized flows
that the kernel handles. At that point, you will likely introduce a
new interface to userspace to expose this and get flows processed in a
different way.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
new file mode 100644
index 0000000..21724f1
--- /dev/null
+++ b/include/net/sw_flow.h
@@ -0,0 +1,99 @@ 
+/*
+ * include/net/sw_flow.h - Generic switch flow structures
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _NET_SW_FLOW_H_
+#define _NET_SW_FLOW_H_
+
+struct sw_flow_key_ipv4_tunnel {
+	__be64 tun_id;
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+	__be16 tun_flags;
+	u8   ipv4_tos;
+	u8   ipv4_ttl;
+};
+
+struct sw_flow_key {
+	struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
+	struct {
+		u32	priority;	/* Packet QoS priority. */
+		u32	skb_mark;	/* SKB mark. */
+		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
+	} __packed phy; /* Safe when right after 'tun_key'. */
+	struct {
+		u8     src[ETH_ALEN];	/* Ethernet source address. */
+		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
+		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		__be16 type;		/* Ethernet frame type. */
+	} eth;
+	struct {
+		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
+		u8     tos;		/* IP ToS. */
+		u8     ttl;		/* IP TTL/hop limit. */
+		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
+	} ip;
+	struct {
+		__be16 src;		/* TCP/UDP/SCTP source port. */
+		__be16 dst;		/* TCP/UDP/SCTP destination port. */
+		__be16 flags;		/* TCP flags. */
+	} tp;
+	union {
+		struct {
+			struct {
+				__be32 src;	/* IP source address. */
+				__be32 dst;	/* IP destination address. */
+			} addr;
+			struct {
+				u8 sha[ETH_ALEN];	/* ARP source hardware address. */
+				u8 tha[ETH_ALEN];	/* ARP target hardware address. */
+			} arp;
+		} ipv4;
+		struct {
+			struct {
+				struct in6_addr src;	/* IPv6 source address. */
+				struct in6_addr dst;	/* IPv6 destination address. */
+			} addr;
+			__be32 label;			/* IPv6 flow label. */
+			struct {
+				struct in6_addr target;	/* ND target address. */
+				u8 sll[ETH_ALEN];	/* ND source link layer address. */
+				u8 tll[ETH_ALEN];	/* ND target link layer address. */
+			} nd;
+		} ipv6;
+	};
+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
+
+struct sw_flow_key_range {
+	unsigned short int start;
+	unsigned short int end;
+};
+
+struct sw_flow_mask {
+	struct sw_flow_key_range range;
+	struct sw_flow_key key;
+};
+
+struct sw_flow_action {
+};
+
+struct sw_flow_actions {
+	unsigned count;
+	struct sw_flow_action actions[0];
+};
+
+struct sw_flow {
+	struct sw_flow_key key;
+	struct sw_flow_key unmasked_key;
+	struct sw_flow_mask *mask;
+	struct sw_flow_actions *actions;
+};
+
+#endif /* _NET_SW_FLOW_H_ */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 5231652..a044491 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -610,8 +610,9 @@  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 /* Execute a list of actions against 'skb'. */
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 {
-	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+	struct ovs_flow_actions *acts;
 
+	acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 	OVS_CB(skb)->tun_key = NULL;
 	return do_execute_actions(dp, skb, acts->actions, acts->actions_len);
 }
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7228ec3..683d6cd 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -240,7 +240,7 @@  void ovs_dp_detach_port(struct vport *p)
 void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 {
 	struct datapath *dp = p->dp;
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct dp_stats_percpu *stats;
 	struct sw_flow_key key;
 	u64 *stats_counter;
@@ -505,9 +505,9 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 {
 	struct ovs_header *ovs_header = info->userhdr;
 	struct nlattr **a = info->attrs;
-	struct sw_flow_actions *acts;
+	struct ovs_flow_actions *acts;
 	struct sk_buff *packet;
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct datapath *dp;
 	struct ethhdr *eth;
 	int len;
@@ -544,11 +544,11 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(flow))
 		goto err_kfree_skb;
 
-	err = ovs_flow_extract(packet, -1, &flow->key);
+	err = ovs_flow_extract(packet, -1, &flow->flow.key);
 	if (err)
 		goto err_flow_free;
 
-	err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
+	err = ovs_nla_get_flow_metadata(&flow->flow, a[OVS_PACKET_ATTR_KEY]);
 	if (err)
 		goto err_flow_free;
 	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
@@ -557,15 +557,15 @@  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 		goto err_flow_free;
 
 	err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
-				   &flow->key, 0, &acts);
+				   &flow->flow.key, 0, &acts);
 	rcu_assign_pointer(flow->sf_acts, acts);
 	if (err)
 		goto err_flow_free;
 
 	OVS_CB(packet)->flow = flow;
-	OVS_CB(packet)->pkt_key = &flow->key;
-	packet->priority = flow->key.phy.priority;
-	packet->mark = flow->key.phy.skb_mark;
+	OVS_CB(packet)->pkt_key = &flow->flow.key;
+	packet->priority = flow->flow.key.phy.priority;
+	packet->mark = flow->flow.key.phy.skb_mark;
 
 	rcu_read_lock();
 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -648,7 +648,7 @@  static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
 	}
 }
 
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+static size_t ovs_flow_cmd_msg_size(const struct ovs_flow_actions *acts)
 {
 	return NLMSG_ALIGN(sizeof(struct ovs_header))
 		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
@@ -660,7 +660,7 @@  static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
 }
 
 /* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+static int ovs_flow_cmd_fill_info(const struct ovs_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
 				  u32 seq, u32 flags, u8 cmd)
 {
@@ -684,7 +684,8 @@  static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 	if (!nla)
 		goto nla_put_failure;
 
-	err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
+	err = ovs_nla_put_flow(&flow->flow.unmasked_key,
+			       &flow->flow.unmasked_key, skb);
 	if (err)
 		goto error;
 	nla_nest_end(skb, nla);
@@ -693,7 +694,7 @@  static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 	if (!nla)
 		goto nla_put_failure;
 
-	err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+	err = ovs_nla_put_flow(&flow->flow.key, &flow->flow.mask->key, skb);
 	if (err)
 		goto error;
 
@@ -725,7 +726,7 @@  static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 	 */
 	start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
 	if (start) {
-		const struct sw_flow_actions *sf_acts;
+		const struct ovs_flow_actions *sf_acts;
 
 		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
 		err = ovs_nla_put_actions(sf_acts->actions,
@@ -752,9 +753,9 @@  error:
 }
 
 /* May not be called with RCU read lock. */
-static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
-					       struct genl_info *info,
-					       bool always)
+static struct sk_buff *
+ovs_flow_cmd_alloc_info(const struct ovs_flow_actions *acts,
+			struct genl_info *info, bool always)
 {
 	struct sk_buff *skb;
 
@@ -769,7 +770,7 @@  static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
 }
 
 /* Called with ovs_mutex. */
-static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+static struct sk_buff *ovs_flow_cmd_build_info(const struct ovs_flow *flow,
 					       int dp_ifindex,
 					       struct genl_info *info, u8 cmd,
 					       bool always)
@@ -793,12 +794,12 @@  static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
-	struct sw_flow *flow, *new_flow;
+	struct ovs_flow *flow, *new_flow;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
-	struct sw_flow_actions *acts;
-	struct sw_flow_match match;
+	struct ovs_flow_actions *acts;
+	struct ovs_flow_match match;
 	int error;
 
 	/* Must have key and actions. */
@@ -818,13 +819,14 @@  static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Extract key. */
-	ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+	ovs_match_init(&match, &new_flow->flow.unmasked_key, &mask);
 	error = ovs_nla_get_match(&match,
 				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
 	if (error)
 		goto err_kfree_flow;
 
-	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+	ovs_flow_mask_key(&new_flow->flow.key,
+			  &new_flow->flow.unmasked_key, &mask);
 
 	/* Validate actions. */
 	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
@@ -832,8 +834,8 @@  static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(acts))
 		goto err_kfree_flow;
 
-	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
-				     0, &acts);
+	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+				     &new_flow->flow.key, 0, &acts);
 	if (error) {
 		OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
 		goto err_kfree_acts;
@@ -852,7 +854,7 @@  static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 	}
 	/* Check if this is a duplicate flow */
-	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->flow.unmasked_key);
 	if (likely(!flow)) {
 		rcu_assign_pointer(new_flow->sf_acts, acts);
 
@@ -873,7 +875,7 @@  static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		}
 		ovs_unlock();
 	} else {
-		struct sw_flow_actions *old_acts;
+		struct ovs_flow_actions *old_acts;
 
 		/* Bail out if we're not allowed to modify an existing flow.
 		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
@@ -932,12 +934,12 @@  static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key, masked_key;
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply = NULL;
 	struct datapath *dp;
-	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
-	struct sw_flow_match match;
+	struct ovs_flow_actions *old_acts = NULL, *acts = NULL;
+	struct ovs_flow_match match;
 	int error;
 
 	/* Extract key. */
@@ -1039,9 +1041,9 @@  static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct datapath *dp;
-	struct sw_flow_match match;
+	struct ovs_flow_match match;
 	int err;
 
 	if (!a[OVS_FLOW_ATTR_KEY]) {
@@ -1087,9 +1089,9 @@  static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct datapath *dp;
-	struct sw_flow_match match;
+	struct ovs_flow_match match;
 	int err;
 
 	if (likely(a[OVS_FLOW_ATTR_KEY])) {
@@ -1120,7 +1122,7 @@  static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_flow_tbl_remove(&dp->table, flow);
 	ovs_unlock();
 
-	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
+	reply = ovs_flow_cmd_alloc_info((const struct ovs_flow_actions __force *) flow->sf_acts,
 					info, false);
 	if (likely(reply)) {
 		if (likely(!IS_ERR(reply))) {
@@ -1160,7 +1162,7 @@  static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	ti = rcu_dereference(dp->table.ti);
 	for (;;) {
-		struct sw_flow *flow;
+		struct ovs_flow *flow;
 		u32 bucket, obj;
 
 		bucket = cb->args[0];
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 701b573..291f5a0 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -100,9 +100,9 @@  struct datapath {
  * packet is not being tunneled.
  */
 struct ovs_skb_cb {
-	struct sw_flow		*flow;
+	struct ovs_flow		*flow;
 	struct sw_flow_key	*pkt_key;
-	struct ovs_key_ipv4_tunnel  *tun_key;
+	struct sw_flow_key_ipv4_tunnel  *tun_key;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 7064da9..4e2d4c8 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -61,7 +61,7 @@  u64 ovs_flow_used_time(unsigned long flow_jiffies)
 
 #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
 
-void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+void ovs_flow_stats_update(struct ovs_flow *flow, __be16 tcp_flags,
 			   struct sk_buff *skb)
 {
 	struct flow_stats *stats;
@@ -123,7 +123,7 @@  unlock:
 }
 
 /* Must be called with rcu_read_lock or ovs_mutex. */
-void ovs_flow_stats_get(const struct sw_flow *flow,
+void ovs_flow_stats_get(const struct ovs_flow *flow,
 			struct ovs_flow_stats *ovs_stats,
 			unsigned long *used, __be16 *tcp_flags)
 {
@@ -152,7 +152,7 @@  void ovs_flow_stats_get(const struct sw_flow *flow,
 }
 
 /* Called with ovs_mutex. */
-void ovs_flow_stats_clear(struct sw_flow *flow)
+void ovs_flow_stats_clear(struct ovs_flow *flow)
 {
 	int node;
 
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 5e5aaed..712314e 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -32,26 +32,18 @@ 
 #include <linux/time.h>
 #include <linux/flex_array.h>
 #include <net/inet_ecn.h>
+#include <net/sw_flow.h>
 
 struct sk_buff;
 
-/* Used to memset ovs_key_ipv4_tunnel padding. */
+/* Used to memset sw_flow_key_ipv4_tunnel padding. */
 #define OVS_TUNNEL_KEY_SIZE					\
-	(offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) +	\
-	FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
-
-struct ovs_key_ipv4_tunnel {
-	__be64 tun_id;
-	__be32 ipv4_src;
-	__be32 ipv4_dst;
-	__be16 tun_flags;
-	u8   ipv4_tos;
-	u8   ipv4_ttl;
-} __packed __aligned(4); /* Minimize padding. */
-
-static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
-					 const struct iphdr *iph, __be64 tun_id,
-					 __be16 tun_flags)
+	(offsetof(struct sw_flow_key_ipv4_tunnel, ipv4_ttl) +	\
+	FIELD_SIZEOF(struct sw_flow_key_ipv4_tunnel, ipv4_ttl))
+
+static inline void
+ovs_flow_tun_key_init(struct sw_flow_key_ipv4_tunnel *tun_key,
+		      const struct iphdr *iph, __be64 tun_id, __be16 tun_flags)
 {
 	tun_key->tun_id = tun_id;
 	tun_key->ipv4_src = iph->saddr;
@@ -65,76 +57,20 @@  static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
 	       sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
 }
 
-struct sw_flow_key {
-	struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
-	struct {
-		u32	priority;	/* Packet QoS priority. */
-		u32	skb_mark;	/* SKB mark. */
-		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
-	} __packed phy; /* Safe when right after 'tun_key'. */
-	struct {
-		u8     src[ETH_ALEN];	/* Ethernet source address. */
-		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
-		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
-		__be16 type;		/* Ethernet frame type. */
-	} eth;
-	struct {
-		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
-		u8     tos;		/* IP ToS. */
-		u8     ttl;		/* IP TTL/hop limit. */
-		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
-	} ip;
-	struct {
-		__be16 src;		/* TCP/UDP/SCTP source port. */
-		__be16 dst;		/* TCP/UDP/SCTP destination port. */
-		__be16 flags;		/* TCP flags. */
-	} tp;
-	union {
-		struct {
-			struct {
-				__be32 src;	/* IP source address. */
-				__be32 dst;	/* IP destination address. */
-			} addr;
-			struct {
-				u8 sha[ETH_ALEN];	/* ARP source hardware address. */
-				u8 tha[ETH_ALEN];	/* ARP target hardware address. */
-			} arp;
-		} ipv4;
-		struct {
-			struct {
-				struct in6_addr src;	/* IPv6 source address. */
-				struct in6_addr dst;	/* IPv6 destination address. */
-			} addr;
-			__be32 label;			/* IPv6 flow label. */
-			struct {
-				struct in6_addr target;	/* ND target address. */
-				u8 sll[ETH_ALEN];	/* ND source link layer address. */
-				u8 tll[ETH_ALEN];	/* ND target link layer address. */
-			} nd;
-		} ipv6;
-	};
-} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
-
-struct sw_flow_key_range {
-	unsigned short int start;
-	unsigned short int end;
-};
-
-struct sw_flow_mask {
+struct ovs_flow_mask {
 	int ref_count;
 	struct rcu_head rcu;
 	struct list_head list;
-	struct sw_flow_key_range range;
-	struct sw_flow_key key;
+	struct sw_flow_mask mask;
 };
 
-struct sw_flow_match {
+struct ovs_flow_match {
 	struct sw_flow_key *key;
 	struct sw_flow_key_range range;
 	struct sw_flow_mask *mask;
 };
 
-struct sw_flow_actions {
+struct ovs_flow_actions {
 	struct rcu_head rcu;
 	u32 actions_len;
 	struct nlattr actions[];
@@ -148,17 +84,15 @@  struct flow_stats {
 	__be16 tcp_flags;		/* Union of seen TCP flags. */
 };
 
-struct sw_flow {
+struct ovs_flow {
 	struct rcu_head rcu;
 	struct hlist_node hash_node[2];
 	u32 hash;
 	int stats_last_writer;		/* NUMA-node id of the last writer on
 					 * 'stats[0]'.
 					 */
-	struct sw_flow_key key;
-	struct sw_flow_key unmasked_key;
-	struct sw_flow_mask *mask;
-	struct sw_flow_actions __rcu *sf_acts;
+	struct sw_flow flow;
+	struct ovs_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
 					   * is allocated at flow creation time,
 					   * the rest are allocated on demand
@@ -180,11 +114,11 @@  struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
-void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+void ovs_flow_stats_update(struct ovs_flow *, __be16 tcp_flags,
 			   struct sk_buff *);
-void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
+void ovs_flow_stats_get(const struct ovs_flow *, struct ovs_flow_stats *,
 			unsigned long *used, __be16 *tcp_flags);
-void ovs_flow_stats_clear(struct sw_flow *);
+void ovs_flow_stats_clear(struct ovs_flow *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
 int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d757848..1eb5054 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,7 +48,7 @@ 
 
 #include "flow_netlink.h"
 
-static void update_range__(struct sw_flow_match *match,
+static void update_range__(struct ovs_flow_match *match,
 			   size_t offset, size_t size, bool is_mask)
 {
 	struct sw_flow_key_range *range = NULL;
@@ -105,7 +105,7 @@  static u16 range_n_bytes(const struct sw_flow_key_range *range)
 	return range->end - range->start;
 }
 
-static bool match_validate(const struct sw_flow_match *match,
+static bool match_validate(const struct ovs_flow_match *match,
 			   u64 key_attrs, u64 mask_attrs)
 {
 	u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
@@ -327,7 +327,7 @@  static int parse_flow_nlattrs(const struct nlattr *attr,
 }
 
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
-				struct sw_flow_match *match, bool is_mask)
+				struct ovs_flow_match *match, bool is_mask)
 {
 	struct nlattr *a;
 	int rem;
@@ -416,8 +416,8 @@  static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 }
 
 static int ipv4_tun_to_nlattr(struct sk_buff *skb,
-			      const struct ovs_key_ipv4_tunnel *tun_key,
-			      const struct ovs_key_ipv4_tunnel *output)
+			      const struct sw_flow_key_ipv4_tunnel *tun_key,
+			      const struct sw_flow_key_ipv4_tunnel *output)
 {
 	struct nlattr *nla;
 
@@ -451,7 +451,7 @@  static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 }
 
 
-static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
+static int metadata_from_nlattrs(struct ovs_flow_match *match,  u64 *attrs,
 				 const struct nlattr **a, bool is_mask)
 {
 	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
@@ -489,7 +489,7 @@  static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 	return 0;
 }
 
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+static int ovs_key_from_nlattrs(struct ovs_flow_match *match, u64 attrs,
 				const struct nlattr **a, bool is_mask)
 {
 	int err;
@@ -730,7 +730,7 @@  static void sw_flow_mask_set(struct sw_flow_mask *mask,
  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
  * attribute specifies the mask field of the wildcarded flow.
  */
-int ovs_nla_get_match(struct sw_flow_match *match,
+int ovs_nla_get_match(struct ovs_flow_match *match,
 		      const struct nlattr *key,
 		      const struct nlattr *mask)
 {
@@ -849,11 +849,11 @@  int ovs_nla_get_match(struct sw_flow_match *match,
 int ovs_nla_get_flow_metadata(struct sw_flow *flow,
 			      const struct nlattr *attr)
 {
-	struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
+	struct sw_flow_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
 	u64 attrs = 0;
 	int err;
-	struct sw_flow_match match;
+	struct ovs_flow_match match;
 
 	flow->key.phy.in_port = DP_MAX_PORTS;
 	flow->key.phy.priority = 0;
@@ -1070,9 +1070,9 @@  nla_put_failure:
 
 #define MAX_ACTIONS_BUFSIZE	(32 * 1024)
 
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+struct ovs_flow_actions *ovs_nla_alloc_flow_actions(int size)
 {
-	struct sw_flow_actions *sfa;
+	struct ovs_flow_actions *sfa;
 
 	if (size > MAX_ACTIONS_BUFSIZE)
 		return ERR_PTR(-EINVAL);
@@ -1087,19 +1087,19 @@  struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
 
 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
  * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+void ovs_nla_free_flow_actions(struct ovs_flow_actions *sf_acts)
 {
 	kfree_rcu(sf_acts, rcu);
 }
 
-static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+static struct nlattr *reserve_sfa_size(struct ovs_flow_actions **sfa,
 				       int attr_len)
 {
 
-	struct sw_flow_actions *acts;
+	struct ovs_flow_actions *acts;
 	int new_acts_size;
 	int req_size = NLA_ALIGN(attr_len);
-	int next_offset = offsetof(struct sw_flow_actions, actions) +
+	int next_offset = offsetof(struct ovs_flow_actions, actions) +
 					(*sfa)->actions_len;
 
 	if (req_size <= (ksize(*sfa) - next_offset))
@@ -1127,7 +1127,8 @@  out:
 	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
 }
 
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+static int add_action(struct ovs_flow_actions **sfa, int attrtype,
+		      void *data, int len)
 {
 	struct nlattr *a;
 
@@ -1145,7 +1146,7 @@  static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, in
 	return 0;
 }
 
-static inline int add_nested_action_start(struct sw_flow_actions **sfa,
+static inline int add_nested_action_start(struct ovs_flow_actions **sfa,
 					  int attrtype)
 {
 	int used = (*sfa)->actions_len;
@@ -1158,7 +1159,7 @@  static inline int add_nested_action_start(struct sw_flow_actions **sfa,
 	return used;
 }
 
-static inline void add_nested_action_end(struct sw_flow_actions *sfa,
+static inline void add_nested_action_end(struct ovs_flow_actions *sfa,
 					 int st_offset)
 {
 	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
@@ -1169,7 +1170,7 @@  static inline void add_nested_action_end(struct sw_flow_actions *sfa,
 
 static int validate_and_copy_sample(const struct nlattr *attr,
 				    const struct sw_flow_key *key, int depth,
-				    struct sw_flow_actions **sfa)
+				    struct ovs_flow_actions **sfa)
 {
 	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 	const struct nlattr *probability, *actions;
@@ -1226,7 +1227,7 @@  static int validate_tp_port(const struct sw_flow_key *flow_key)
 	return -EINVAL;
 }
 
-void ovs_match_init(struct sw_flow_match *match,
+void ovs_match_init(struct ovs_flow_match *match,
 		    struct sw_flow_key *key,
 		    struct sw_flow_mask *mask)
 {
@@ -1243,9 +1244,9 @@  void ovs_match_init(struct sw_flow_match *match,
 }
 
 static int validate_and_copy_set_tun(const struct nlattr *attr,
-				     struct sw_flow_actions **sfa)
+				     struct ovs_flow_actions **sfa)
 {
-	struct sw_flow_match match;
+	struct ovs_flow_match match;
 	struct sw_flow_key key;
 	int err, start;
 
@@ -1267,7 +1268,7 @@  static int validate_and_copy_set_tun(const struct nlattr *attr,
 
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
-			struct sw_flow_actions **sfa,
+			struct ovs_flow_actions **sfa,
 			bool *set_tun)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -1381,7 +1382,7 @@  static int validate_userspace(const struct nlattr *attr)
 }
 
 static int copy_action(const struct nlattr *from,
-		       struct sw_flow_actions **sfa)
+		       struct ovs_flow_actions **sfa)
 {
 	int totlen = NLA_ALIGN(from->nla_len);
 	struct nlattr *to;
@@ -1397,7 +1398,7 @@  static int copy_action(const struct nlattr *from,
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 int depth,
-			 struct sw_flow_actions **sfa)
+			 struct ovs_flow_actions **sfa)
 {
 	const struct nlattr *a;
 	int rem, err;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 4401510..296b126 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -37,24 +37,24 @@ 
 
 #include "flow.h"
 
-void ovs_match_init(struct sw_flow_match *match,
+void ovs_match_init(struct ovs_flow_match *match,
 		    struct sw_flow_key *key, struct sw_flow_mask *mask);
 
 int ovs_nla_put_flow(const struct sw_flow_key *,
 		     const struct sw_flow_key *, struct sk_buff *);
 int ovs_nla_get_flow_metadata(struct sw_flow *flow,
 			      const struct nlattr *attr);
-int ovs_nla_get_match(struct sw_flow_match *match,
+int ovs_nla_get_match(struct ovs_flow_match *match,
 		      const struct nlattr *,
 		      const struct nlattr *);
 
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key, int depth,
-			 struct sw_flow_actions **sfa);
+			 struct ovs_flow_actions **sfa);
 int ovs_nla_put_actions(const struct nlattr *attr,
 			int len, struct sk_buff *skb);
 
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
-void ovs_nla_free_flow_actions(struct sw_flow_actions *);
+struct ovs_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
+void ovs_nla_free_flow_actions(struct ovs_flow_actions *);
 
 #endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf2d853..e7d9a41 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -73,9 +73,9 @@  void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		*d++ = *s++ & *m++;
 }
 
-struct sw_flow *ovs_flow_alloc(void)
+struct ovs_flow *ovs_flow_alloc(void)
 {
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct flow_stats *stats;
 	int node;
 
@@ -84,7 +84,7 @@  struct sw_flow *ovs_flow_alloc(void)
 		return ERR_PTR(-ENOMEM);
 
 	flow->sf_acts = NULL;
-	flow->mask = NULL;
+	flow->flow.mask = NULL;
 	flow->stats_last_writer = NUMA_NO_NODE;
 
 	/* Initialize the default stat node. */
@@ -135,11 +135,11 @@  static struct flex_array *alloc_buckets(unsigned int n_buckets)
 	return buckets;
 }
 
-static void flow_free(struct sw_flow *flow)
+static void flow_free(struct ovs_flow *flow)
 {
 	int node;
 
-	kfree((struct sw_flow_actions __force *)flow->sf_acts);
+	kfree((struct ovs_flow_actions __force *)flow->sf_acts);
 	for_each_node(node)
 		if (flow->stats[node])
 			kmem_cache_free(flow_stats_cache,
@@ -149,12 +149,12 @@  static void flow_free(struct sw_flow *flow)
 
 static void rcu_free_flow_callback(struct rcu_head *rcu)
 {
-	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+	struct ovs_flow *flow = container_of(rcu, struct ovs_flow, rcu);
 
 	flow_free(flow);
 }
 
-void ovs_flow_free(struct sw_flow *flow, bool deferred)
+void ovs_flow_free(struct ovs_flow *flow, bool deferred)
 {
 	if (!flow)
 		return;
@@ -232,7 +232,7 @@  static void table_instance_destroy(struct table_instance *ti, bool deferred)
 		goto skip_flows;
 
 	for (i = 0; i < ti->n_buckets; i++) {
-		struct sw_flow *flow;
+		struct ovs_flow *flow;
 		struct hlist_head *head = flex_array_get(ti->buckets, i);
 		struct hlist_node *n;
 		int ver = ti->node_ver;
@@ -257,10 +257,10 @@  void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
 	table_instance_destroy(ti, deferred);
 }
 
-struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
-				       u32 *bucket, u32 *last)
+struct ovs_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
+					u32 *bucket, u32 *last)
 {
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct hlist_head *head;
 	int ver;
 	int i;
@@ -291,7 +291,8 @@  static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
 				(hash & (ti->n_buckets - 1)));
 }
 
-static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+static void table_instance_insert(struct table_instance *ti,
+				  struct ovs_flow *flow)
 {
 	struct hlist_head *head;
 
@@ -310,7 +311,7 @@  static void flow_table_copy_flows(struct table_instance *old,
 
 	/* Insert in new table. */
 	for (i = 0; i < old->n_buckets; i++) {
-		struct sw_flow *flow;
+		struct ovs_flow *flow;
 		struct hlist_head *head;
 
 		head = flex_array_get(old->buckets, i);
@@ -397,21 +398,21 @@  static bool flow_cmp_masked_key(const struct sw_flow *flow,
 	return cmp_key(&flow->key, key, key_start, key_end);
 }
 
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       struct sw_flow_match *match)
+bool ovs_flow_cmp_unmasked_key(const struct ovs_flow *flow,
+			       struct ovs_flow_match *match)
 {
 	struct sw_flow_key *key = match->key;
 	int key_start = flow_key_start(key);
 	int key_end = match->range.end;
 
-	return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+	return cmp_key(&flow->flow.unmasked_key, key, key_start, key_end);
 }
 
-static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
-					  const struct sw_flow_key *unmasked,
-					  struct sw_flow_mask *mask)
+static struct ovs_flow *masked_flow_lookup(struct table_instance *ti,
+					   const struct sw_flow_key *unmasked,
+					   struct sw_flow_mask *mask)
 {
-	struct sw_flow *flow;
+	struct ovs_flow *flow;
 	struct hlist_head *head;
 	int key_start = mask->range.start;
 	int key_end = mask->range.end;
@@ -422,50 +423,50 @@  static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 	hash = flow_hash(&masked_key, key_start, key_end);
 	head = find_bucket(ti, hash);
 	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
-		if (flow->mask == mask && flow->hash == hash &&
-		    flow_cmp_masked_key(flow, &masked_key,
-					  key_start, key_end))
+		if (flow->flow.mask == mask && flow->hash == hash &&
+		    flow_cmp_masked_key(&flow->flow, &masked_key,
+					key_start, key_end))
 			return flow;
 	}
 	return NULL;
 }
 
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
-				    const struct sw_flow_key *key,
-				    u32 *n_mask_hit)
+struct ovs_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+					   const struct sw_flow_key *key,
+					   u32 *n_mask_hit)
 {
 	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
-	struct sw_flow_mask *mask;
-	struct sw_flow *flow;
+	struct ovs_flow_mask *mask;
+	struct ovs_flow *flow;
 
 	*n_mask_hit = 0;
 	list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
 		(*n_mask_hit)++;
-		flow = masked_flow_lookup(ti, key, mask);
+		flow = masked_flow_lookup(ti, key, &mask->mask);
 		if (flow)  /* Found */
 			return flow;
 	}
 	return NULL;
 }
 
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
-				    const struct sw_flow_key *key)
+struct ovs_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
+				     const struct sw_flow_key *key)
 {
 	u32 __always_unused n_mask_hit;
 
 	return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
 }
 
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
-					  struct sw_flow_match *match)
+struct ovs_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					   struct ovs_flow_match *match)
 {
 	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
-	struct sw_flow_mask *mask;
-	struct sw_flow *flow;
+	struct ovs_flow_mask *mask;
+	struct ovs_flow *flow;
 
 	/* Always called under ovs-mutex. */
 	list_for_each_entry(mask, &tbl->mask_list, list) {
-		flow = masked_flow_lookup(ti, match->key, mask);
+		flow = masked_flow_lookup(ti, match->key, &mask->mask);
 		if (flow && ovs_flow_cmp_unmasked_key(flow, match))  /* Found */
 			return flow;
 	}
@@ -474,7 +475,7 @@  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 
 int ovs_flow_tbl_num_masks(const struct flow_table *table)
 {
-	struct sw_flow_mask *mask;
+	struct ovs_flow_mask *mask;
 	int num = 0;
 
 	list_for_each_entry(mask, &table->mask_list, list)
@@ -489,7 +490,7 @@  static struct table_instance *table_instance_expand(struct table_instance *ti)
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+static void flow_mask_remove(struct flow_table *tbl, struct ovs_flow_mask *mask)
 {
 	if (mask) {
 		/* ovs-lock is required to protect mask-refcount and
@@ -507,9 +508,12 @@  static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 }
 
 /* Must be called with OVS mutex held. */
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+void ovs_flow_tbl_remove(struct flow_table *table, struct ovs_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct ovs_flow_mask *mask = container_of(flow->flow.mask,
+						  struct ovs_flow_mask,
+						  mask);
 
 	BUG_ON(table->count == 0);
 	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
@@ -518,12 +522,12 @@  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
 	 * accessible as long as the RCU read lock is held.
 	 */
-	flow_mask_remove(table, flow->mask);
+	flow_mask_remove(table, mask);
 }
 
-static struct sw_flow_mask *mask_alloc(void)
+static struct ovs_flow_mask *mask_alloc(void)
 {
-	struct sw_flow_mask *mask;
+	struct ovs_flow_mask *mask;
 
 	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
 	if (mask)
@@ -543,15 +547,16 @@  static bool mask_equal(const struct sw_flow_mask *a,
 		&& (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
 }
 
-static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
-					   const struct sw_flow_mask *mask)
+static struct ovs_flow_mask *flow_mask_find(const struct flow_table *tbl,
+					    const struct sw_flow_mask *mask)
 {
 	struct list_head *ml;
 
 	list_for_each(ml, &tbl->mask_list) {
-		struct sw_flow_mask *m;
-		m = container_of(ml, struct sw_flow_mask, list);
-		if (mask_equal(mask, m))
+		struct ovs_flow_mask *m;
+
+		m = container_of(ml, struct ovs_flow_mask, list);
+		if (mask_equal(mask, &m->mask))
 			return m;
 	}
 
@@ -559,30 +564,31 @@  static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
 }
 
 /* Add 'mask' into the mask list, if it is not already there. */
-static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
+static int flow_mask_insert(struct flow_table *tbl, struct ovs_flow *flow,
 			    struct sw_flow_mask *new)
 {
-	struct sw_flow_mask *mask;
+	struct ovs_flow_mask *mask;
+
 	mask = flow_mask_find(tbl, new);
 	if (!mask) {
 		/* Allocate a new mask if none exsits. */
 		mask = mask_alloc();
 		if (!mask)
 			return -ENOMEM;
-		mask->key = new->key;
-		mask->range = new->range;
+		mask->mask.key = new->key;
+		mask->mask.range = new->range;
 		list_add_rcu(&mask->list, &tbl->mask_list);
 	} else {
 		BUG_ON(!mask->ref_count);
 		mask->ref_count++;
 	}
 
-	flow->mask = mask;
+	flow->flow.mask = &mask->mask;
 	return 0;
 }
 
 /* Must be called with OVS mutex held. */
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+int ovs_flow_tbl_insert(struct flow_table *table, struct ovs_flow *flow,
 			struct sw_flow_mask *mask)
 {
 	struct table_instance *new_ti = NULL;
@@ -593,8 +599,8 @@  int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 	if (err)
 		return err;
 
-	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
-			flow->mask->range.end);
+	flow->hash = flow_hash(&flow->flow.key, flow->flow.mask->range.start,
+			       flow->flow.mask->range.end);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
@@ -620,7 +626,7 @@  int ovs_flow_init(void)
 	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
 
-	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+	flow_cache = kmem_cache_create("ovs_flow", sizeof(struct ovs_flow)
 				       + (num_possible_nodes()
 					  * sizeof(struct flow_stats *)),
 				       0, 0, NULL);
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 5918bff..d57d6b5 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -57,29 +57,29 @@  extern struct kmem_cache *flow_stats_cache;
 int ovs_flow_init(void);
 void ovs_flow_exit(void);
 
-struct sw_flow *ovs_flow_alloc(void);
-void ovs_flow_free(struct sw_flow *, bool deferred);
+struct ovs_flow *ovs_flow_alloc(void);
+void ovs_flow_free(struct ovs_flow *, bool deferred);
 
 int ovs_flow_tbl_init(struct flow_table *);
 int ovs_flow_tbl_count(struct flow_table *table);
 void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
 int ovs_flow_tbl_flush(struct flow_table *flow_table);
 
-int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+int ovs_flow_tbl_insert(struct flow_table *table, struct ovs_flow *flow,
 			struct sw_flow_mask *mask);
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+void ovs_flow_tbl_remove(struct flow_table *table, struct ovs_flow *flow);
 int  ovs_flow_tbl_num_masks(const struct flow_table *table);
-struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
-				       u32 *bucket, u32 *idx);
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
-				    const struct sw_flow_key *,
-				    u32 *n_mask_hit);
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
-				    const struct sw_flow_key *);
-struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
-					  struct sw_flow_match *match);
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       struct sw_flow_match *match);
+struct ovs_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
+					u32 *bucket, u32 *idx);
+struct ovs_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
+					   const struct sw_flow_key *,
+					   u32 *n_mask_hit);
+struct ovs_flow *ovs_flow_tbl_lookup(struct flow_table *,
+				     const struct sw_flow_key *);
+struct ovs_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					   struct ovs_flow_match *match);
+bool ovs_flow_cmp_unmasked_key(const struct ovs_flow *flow,
+			       struct ovs_flow_match *match);
 
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask);
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index f49148a..fda79eb 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -63,7 +63,7 @@  static __be16 filter_tnl_flags(__be16 flags)
 static struct sk_buff *__build_header(struct sk_buff *skb,
 				      int tunnel_hlen)
 {
-	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+	const struct sw_flow_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
 	struct tnl_ptk_info tpi;
 
 	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
@@ -92,7 +92,7 @@  static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
 static int gre_rcv(struct sk_buff *skb,
 		   const struct tnl_ptk_info *tpi)
 {
-	struct ovs_key_ipv4_tunnel tun_key;
+	struct sw_flow_key_ipv4_tunnel tun_key;
 	struct ovs_net *ovs_net;
 	struct vport *vport;
 	__be64 key;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d8b7e24..b7edf47 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -58,7 +58,7 @@  static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 /* Called with rcu_read_lock and BH disabled. */
 static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 {
-	struct ovs_key_ipv4_tunnel tun_key;
+	struct sw_flow_key_ipv4_tunnel tun_key;
 	struct vport *vport = vs->data;
 	struct iphdr *iph;
 	__be64 key;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6d8f2ec..7df5234 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -438,7 +438,7 @@  u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb)
  * skb->data should point to the Ethernet header.
  */
 void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
-		       struct ovs_key_ipv4_tunnel *tun_key)
+		       struct sw_flow_key_ipv4_tunnel *tun_key)
 {
 	struct pcpu_sw_netstats *stats;
 
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 35f89d8..8409e06 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -210,7 +210,7 @@  static inline struct vport *vport_from_priv(void *priv)
 }
 
 void ovs_vport_receive(struct vport *, struct sk_buff *,
-		       struct ovs_key_ipv4_tunnel *);
+		       struct sw_flow_key_ipv4_tunnel *);
 
 /* List of statically compiled vport implementations.  Don't forget to also
  * add yours to the list at the top of vport.c. */