diff mbox series

[ovs-dev,net-next,v2] openvswitch: add TTL decrement action

Message ID 20191217155102.46039-1-mcroce@redhat.com
State Awaiting Upstream
Headers show
Series [ovs-dev,net-next,v2] openvswitch: add TTL decrement action | expand

Commit Message

Matteo Croce Dec. 17, 2019, 3:51 p.m. UTC
New action to decrement TTL instead of setting it to a fixed value.
This action will decrement the TTL and, in case of expired TTL, drop it
or execute an action passed via a nested attribute.
The default TTL expired action is to drop the packet.

Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.

Tested with a corresponding change in the userspace:

    # ovs-dpctl dump-flows
    in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
    in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
    in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
    in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1

    # ping -c1 192.168.0.2 -t 42
    IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
        192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
    # ping -c1 192.168.0.2 -t 120
    IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
        192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
    # ping -c1 192.168.0.2 -t 1
    #

Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
 include/uapi/linux/openvswitch.h |  22 +++++++
 net/openvswitch/actions.c        |  71 +++++++++++++++++++++
 net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
 3 files changed, 198 insertions(+)

Comments

Nikolay Aleksandrov Dec. 17, 2019, 4:30 p.m. UTC | #1
On 17/12/2019 17:51, Matteo Croce wrote:
> New action to decrement TTL instead of setting it to a fixed value.
> This action will decrement the TTL and, in case of expired TTL, drop it
> or execute an action passed via a nested attribute.
> The default TTL expired action is to drop the packet.
> 
> Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> 
> Tested with a corresponding change in the userspace:
> 
>     # ovs-dpctl dump-flows
>     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
>     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
>     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
>     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> 
>     # ping -c1 192.168.0.2 -t 42
>     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
>         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
>     # ping -c1 192.168.0.2 -t 120
>     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
>         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
>     # ping -c1 192.168.0.2 -t 1
>     #
> 
> Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Matteo Croce <mcroce@redhat.com>
> ---
>  include/uapi/linux/openvswitch.h |  22 +++++++
>  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
>  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
>  3 files changed, 198 insertions(+)
> 

Hi Matteo,

[snip]
> +}
> +
>  /* When 'last' is true, sample() should always consume the 'skb'.
>   * Otherwise, sample() should keep 'skb' intact regardless what
>   * actions are executed within sample().
> @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
>  			     nla_len(actions), last, clone_flow_key);
>  }
>  
> +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> +	int err;
> +
> +	if (skb->protocol == htons(ETH_P_IPV6)) {
> +		struct ipv6hdr *nh = ipv6_hdr(skb);
> +
> +		err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +					  sizeof(*nh));

skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
It seems the IPv4 version below is ok as the ptr is reloaded.

One q as I don't know ovs that much - can this action be called only with
skb->protocol ==  ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?


Thanks,
 Nik

> +		if (unlikely(err))
> +			return err;
> +
> +		if (nh->hop_limit <= 1)
> +			return -EHOSTUNREACH;
> +
> +		key->ip.ttl = --nh->hop_limit;
> +	} else {
> +		struct iphdr *nh = ip_hdr(skb);
> +		u8 old_ttl;
> +
> +		err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +					  sizeof(*nh));
> +		if (unlikely(err))
> +			return err;
> +
> +		nh = ip_hdr(skb);
> +		if (nh->ttl <= 1)
> +			return -EHOSTUNREACH;
> +
> +		old_ttl = nh->ttl--;
> +		csum_replace2(&nh->check, htons(old_ttl << 8),
> +			      htons(nh->ttl << 8));
> +		key->ip.ttl = nh->ttl;
> +	}
> +
> +	return 0;
> +}
> +
>  /* Execute a list of actions against 'skb'. */
>  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>  			      struct sw_flow_key *key,
> @@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>  
>  			break;
>  		}
> +
> +		case OVS_ACTION_ATTR_DEC_TTL:
> +			err = execute_dec_ttl(skb, key);
> +			if (err == -EHOSTUNREACH) {
> +				err = dec_ttl(dp, skb, key, a, true);
> +				return err;
> +			}
> +			break;
>  		}
>  
>  		if (unlikely(err)) {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 65c2e3458ff5..a9eea2ffb8b0 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
>  		case OVS_ACTION_ATTR_RECIRC:
>  		case OVS_ACTION_ATTR_TRUNC:
>  		case OVS_ACTION_ATTR_USERSPACE:
> +		case OVS_ACTION_ATTR_DEC_TTL:
>  			break;
>  
>  		case OVS_ACTION_ATTR_CT:
> @@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  	return 0;
>  }
>  
> +static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
> +				     const struct sw_flow_key *key,
> +				     struct sw_flow_actions **sfa,
> +				     __be16 eth_type, __be16 vlan_tci,
> +				     u32 mpls_label_count, bool log)
> +{
> +	struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
> +	const struct nlattr *action_type, *action;
> +	struct nlattr *a;
> +	int rem, start, err;
> +	struct dec_ttl_arg arg;
> +
> +	nla_for_each_nested(a, attr, rem) {
> +		int type = nla_type(a);
> +
> +		if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
> +			return -EINVAL;
> +
> +		attrs[type] = a;
> +	}
> +	if (rem)
> +		return -EINVAL;
> +
> +	action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
> +	if (!action_type || nla_len(action_type) != sizeof(u32))
> +		return -EINVAL;
> +
> +	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
> +	if (start < 0)
> +		return start;
> +
> +	arg.action_type = nla_get_u32(action_type);
> +	err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
> +				 &arg, sizeof(arg), log);
> +	if (err)
> +		return err;
> +
> +	if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> +		action = attrs[OVS_DEC_TTL_ATTR_ACTION];
> +		if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
> +			return -EINVAL;
> +
> +		err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
> +					     vlan_tci, mpls_label_count, log);
> +		if (err)
> +			return err;
> +	}
> +
> +	add_nested_action_end(*sfa, start);
> +
> +	return 0;
> +}
> +
>  static int validate_and_copy_clone(struct net *net,
>  				   const struct nlattr *attr,
>  				   const struct sw_flow_key *key,
> @@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  			[OVS_ACTION_ATTR_METER] = sizeof(u32),
>  			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
>  			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> +			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
>  		};
>  		const struct ovs_action_push_vlan *vlan;
>  		int type = nla_type(a);
> @@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  			break;
>  		}
>  
> +		case OVS_ACTION_ATTR_DEC_TTL:
> +			err = validate_and_copy_dec_ttl(net, a, key, sfa,
> +							eth_type, vlan_tci,
> +							mpls_label_count, log);
> +			if (err)
> +				return err;
> +			skip_copy = true;
> +			break;
> +
>  		default:
>  			OVS_NLERR(log, "Unknown Action type %d", type);
>  			return -EINVAL;
> @@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
>  	return err;
>  }
>  
> +static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
> +{
> +	struct nlattr *start, *ac_start = NULL, *dec_ttl;
> +	int err = 0, rem = nla_len(att);
> +	const struct dec_ttl_arg *arg;
> +	struct nlattr *actions;
> +
> +	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
> +	if (!start)
> +		return -EMSGSIZE;
> +
> +	dec_ttl = nla_data(att);
> +	arg = nla_data(dec_ttl);
> +	actions = nla_next(dec_ttl, &rem);
> +
> +	if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
> +		nla_nest_cancel(skb, start);
> +		return -EMSGSIZE;
> +	}
> +
> +	if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> +		ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
> +		if (!ac_start) {
> +			nla_nest_cancel(skb, ac_start);
> +			nla_nest_cancel(skb, start);
> +			return -EMSGSIZE;
> +		}
> +		err = ovs_nla_put_actions(actions, rem, skb);
> +		nla_nest_end(skb, ac_start);
> +	}
> +	nla_nest_end(skb, start);
> +
> +	return err;
> +}
> +
>  static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
>  {
>  	const struct nlattr *ovs_key = nla_data(a);
> @@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
>  				return err;
>  			break;
>  
> +		case OVS_ACTION_ATTR_DEC_TTL:
> +			err = dec_ttl_action_to_attr(a, skb);
> +			if (err)
> +				return err;
> +			break;
> +
>  		default:
>  			if (nla_put(skb, type, nla_len(a), nla_data(a)))
>  				return -EMSGSIZE;
>
Pravin Shelar Dec. 18, 2019, 3:06 a.m. UTC | #2
On Tue, Dec 17, 2019 at 7:51 AM Matteo Croce <mcroce@redhat.com> wrote:
>
> New action to decrement TTL instead of setting it to a fixed value.
> This action will decrement the TTL and, in case of expired TTL, drop it
> or execute an action passed via a nested attribute.
> The default TTL expired action is to drop the packet.
>
> Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
>
> Tested with a corresponding change in the userspace:
>
>     # ovs-dpctl dump-flows
>     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
>     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
>     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
>     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
>
>     # ping -c1 192.168.0.2 -t 42
>     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
>         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
>     # ping -c1 192.168.0.2 -t 120
>     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
>         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
>     # ping -c1 192.168.0.2 -t 1
>     #
>
> Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> Signed-off-by: Matteo Croce <mcroce@redhat.com>
> ---
>  include/uapi/linux/openvswitch.h |  22 +++++++
>  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
>  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
>  3 files changed, 198 insertions(+)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index a87b44cd5590..b6684bc04883 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -927,6 +927,7 @@ enum ovs_action_attr {
>         OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
>         OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
>         OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> +       OVS_ACTION_ATTR_DEC_TTL,       /* Nested OVS_DEC_TTL_ATTR_*. */
>
>         __OVS_ACTION_ATTR_MAX,        /* Nothing past this will be accepted
>                                        * from userspace. */
> @@ -939,6 +940,23 @@ enum ovs_action_attr {
>  };
>
>  #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
> +enum ovs_dec_ttl_attr {
> +       OVS_DEC_TTL_ATTR_UNSPEC,
> +       OVS_DEC_TTL_ATTR_ACTION_TYPE,    /* Action Type u32 */
> +       OVS_DEC_TTL_ATTR_ACTION,         /* nested action */
> +       __OVS_DEC_TTL_ATTR_MAX,
> +#ifdef __KERNEL__
> +       OVS_DEC_TTL_ATTR_ARG          /* struct sample_arg  */
> +#endif
> +};
> +

I do not see need for type or OVS_DEC_TTL_ACTION_DROP, if there are no
nested action the datapath can drop the packet.

> +#ifdef __KERNEL__
> +struct dec_ttl_arg {
> +       u32 action_type;            /* dec_ttl action type.*/
> +};
> +#endif
> +
> +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
>
>  /* Meters. */
>  #define OVS_METER_FAMILY  "ovs_meter"
> @@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
>         __OVS_CT_LIMIT_ATTR_MAX
>  };
>
> +enum ovs_dec_ttl_action {            /*Actions supported by dec_ttl */
> +       OVS_DEC_TTL_ACTION_DROP,
> +       OVS_DEC_TTL_ACTION_USER_SPACE
> +};
>  #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
>
>  #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 4c8395462303..5329668732b1 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
>         return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
>  }
>
> +static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
> +                  struct sw_flow_key *fk, const struct nlattr *attr, bool last)
> +{
> +       struct nlattr *actions;
> +       struct nlattr *dec_ttl_arg;
> +       int rem = nla_len(attr);
> +       const struct dec_ttl_arg *arg;
> +
> +       /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
> +       dec_ttl_arg = nla_data(attr);
> +       arg = nla_data(dec_ttl_arg);
> +       actions = nla_next(dec_ttl_arg, &rem);
> +
> +       switch (arg->action_type) {
> +       case OVS_DEC_TTL_ACTION_DROP:
> +               consume_skb(skb);
> +               break;
> +
> +       case OVS_DEC_TTL_ACTION_USER_SPACE:
> +               return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
> +       }
> +
> +       return 0;
> +}
> +
>  /* When 'last' is true, sample() should always consume the 'skb'.
>   * Otherwise, sample() should keep 'skb' intact regardless what
>   * actions are executed within sample().
> @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
>                              nla_len(actions), last, clone_flow_key);
>  }
>
> +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> +       int err;
> +
> +       if (skb->protocol == htons(ETH_P_IPV6)) {
> +               struct ipv6hdr *nh = ipv6_hdr(skb);
> +
> +               err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +                                         sizeof(*nh));
There is no need to initialize 'nh', just use 'struct ipv6hdr' to get the size.
> +               if (unlikely(err))
> +                       return err;
> +
> +               if (nh->hop_limit <= 1)
> +                       return -EHOSTUNREACH;
> +
> +               key->ip.ttl = --nh->hop_limit;
> +       } else {
> +               struct iphdr *nh = ip_hdr(skb);
> +               u8 old_ttl;
> +
> +               err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +                                         sizeof(*nh));
same as above.
> +               if (unlikely(err))
> +                       return err;
> +
> +               nh = ip_hdr(skb);
> +               if (nh->ttl <= 1)
> +                       return -EHOSTUNREACH;
> +
> +               old_ttl = nh->ttl--;
> +               csum_replace2(&nh->check, htons(old_ttl << 8),
> +                             htons(nh->ttl << 8));
> +               key->ip.ttl = nh->ttl;
> +       }
> +
> +       return 0;
> +}
> +
>  /* Execute a list of actions against 'skb'. */
>  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>                               struct sw_flow_key *key,
> @@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>
>                         break;
>                 }
> +
> +               case OVS_ACTION_ATTR_DEC_TTL:
> +                       err = execute_dec_ttl(skb, key);
> +                       if (err == -EHOSTUNREACH) {
Can you use unlikely().

> +                               err = dec_ttl(dp, skb, key, a, true);
> +                               return err;
> +                       }
> +                       break;
>                 }
>
>                 if (unlikely(err)) {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 65c2e3458ff5..a9eea2ffb8b0 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
>                 case OVS_ACTION_ATTR_RECIRC:
>                 case OVS_ACTION_ATTR_TRUNC:
>                 case OVS_ACTION_ATTR_USERSPACE:
> +               case OVS_ACTION_ATTR_DEC_TTL:
>                         break;
>
>                 case OVS_ACTION_ATTR_CT:
> @@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>         return 0;
>  }
>
> +static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
> +                                    const struct sw_flow_key *key,
> +                                    struct sw_flow_actions **sfa,
> +                                    __be16 eth_type, __be16 vlan_tci,
> +                                    u32 mpls_label_count, bool log)
> +{
> +       struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
> +       const struct nlattr *action_type, *action;
> +       struct nlattr *a;
> +       int rem, start, err;
> +       struct dec_ttl_arg arg;
> +
Here we need to validate if eth_type is IPv4 or IPv6.



> +       nla_for_each_nested(a, attr, rem) {
> +               int type = nla_type(a);
> +
> +               if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
> +                       return -EINVAL;
> +
> +               attrs[type] = a;
> +       }
> +       if (rem)
> +               return -EINVAL;
> +
> +       action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
> +       if (!action_type || nla_len(action_type) != sizeof(u32))
> +               return -EINVAL;
> +
> +       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
> +       if (start < 0)
> +               return start;
> +
> +       arg.action_type = nla_get_u32(action_type);
> +       err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
> +                                &arg, sizeof(arg), log);
> +       if (err)
> +               return err;
> +
> +       if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> +               action = attrs[OVS_DEC_TTL_ATTR_ACTION];
> +               if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
> +                       return -EINVAL;
> +
> +               err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
> +                                            vlan_tci, mpls_label_count, log);
> +               if (err)
> +                       return err;
> +       }
> +
> +       add_nested_action_end(*sfa, start);
> +
> +       return 0;
> +}
> +
>  static int validate_and_copy_clone(struct net *net,
>                                    const struct nlattr *attr,
>                                    const struct sw_flow_key *key,
> @@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>                         [OVS_ACTION_ATTR_METER] = sizeof(u32),
>                         [OVS_ACTION_ATTR_CLONE] = (u32)-1,
>                         [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> +                       [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
>                 };
>                 const struct ovs_action_push_vlan *vlan;
>                 int type = nla_type(a);
> @@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>                         break;
>                 }
>
> +               case OVS_ACTION_ATTR_DEC_TTL:
> +                       err = validate_and_copy_dec_ttl(net, a, key, sfa,
> +                                                       eth_type, vlan_tci,
> +                                                       mpls_label_count, log);
> +                       if (err)
> +                               return err;
> +                       skip_copy = true;
> +                       break;
> +
>                 default:
>                         OVS_NLERR(log, "Unknown Action type %d", type);
>                         return -EINVAL;
> @@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
>         return err;
>  }
>
> +static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
> +{
> +       struct nlattr *start, *ac_start = NULL, *dec_ttl;
> +       int err = 0, rem = nla_len(att);
> +       const struct dec_ttl_arg *arg;
> +       struct nlattr *actions;
> +
> +       start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
> +       if (!start)
> +               return -EMSGSIZE;
> +
> +       dec_ttl = nla_data(att);
> +       arg = nla_data(dec_ttl);
> +       actions = nla_next(dec_ttl, &rem);
> +
> +       if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
> +               nla_nest_cancel(skb, start);
> +               return -EMSGSIZE;
> +       }
> +
> +       if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> +               ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
> +               if (!ac_start) {
> +                       nla_nest_cancel(skb, ac_start);
> +                       nla_nest_cancel(skb, start);
> +                       return -EMSGSIZE;
> +               }
> +               err = ovs_nla_put_actions(actions, rem, skb);
> +               nla_nest_end(skb, ac_start);
> +       }
> +       nla_nest_end(skb, start);
> +
> +       return err;
> +}
> +
>  static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
>  {
>         const struct nlattr *ovs_key = nla_data(a);
> @@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
>                                 return err;
>                         break;
>
> +               case OVS_ACTION_ATTR_DEC_TTL:
> +                       err = dec_ttl_action_to_attr(a, skb);
> +                       if (err)
> +                               return err;
> +                       break;
> +
>                 default:
>                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
>                                 return -EMSGSIZE;
> --
> 2.23.0
>
Matteo Croce Dec. 19, 2019, 4:36 p.m. UTC | #3
On Wed, Dec 18, 2019 at 4:06 AM Pravin Shelar <pshelar@ovn.org> wrote:
>
> On Tue, Dec 17, 2019 at 7:51 AM Matteo Croce <mcroce@redhat.com> wrote:
> >
> > New action to decrement TTL instead of setting it to a fixed value.
> > This action will decrement the TTL and, in case of expired TTL, drop it
> > or execute an action passed via a nested attribute.
> > The default TTL expired action is to drop the packet.
> >
> > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> >
> > Tested with a corresponding change in the userspace:
> >
> >     # ovs-dpctl dump-flows
> >     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> >     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> >     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> >     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> >
> >     # ping -c1 192.168.0.2 -t 42
> >     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> >     # ping -c1 192.168.0.2 -t 120
> >     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> >     # ping -c1 192.168.0.2 -t 1
> >     #
> >
> > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > ---
> >  include/uapi/linux/openvswitch.h |  22 +++++++
> >  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
> >  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
> >  3 files changed, 198 insertions(+)
> >
> > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> > index a87b44cd5590..b6684bc04883 100644
> > --- a/include/uapi/linux/openvswitch.h
> > +++ b/include/uapi/linux/openvswitch.h
> > @@ -927,6 +927,7 @@ enum ovs_action_attr {
> >         OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
> >         OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
> >         OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> > +       OVS_ACTION_ATTR_DEC_TTL,       /* Nested OVS_DEC_TTL_ATTR_*. */
> >
> >         __OVS_ACTION_ATTR_MAX,        /* Nothing past this will be accepted
> >                                        * from userspace. */
> > @@ -939,6 +940,23 @@ enum ovs_action_attr {
> >  };
> >
> >  #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
> > +enum ovs_dec_ttl_attr {
> > +       OVS_DEC_TTL_ATTR_UNSPEC,
> > +       OVS_DEC_TTL_ATTR_ACTION_TYPE,    /* Action Type u32 */
> > +       OVS_DEC_TTL_ATTR_ACTION,         /* nested action */
> > +       __OVS_DEC_TTL_ATTR_MAX,
> > +#ifdef __KERNEL__
> > +       OVS_DEC_TTL_ATTR_ARG          /* struct sample_arg  */
> > +#endif
> > +};
> > +
>
> I do not see need for type or OVS_DEC_TTL_ACTION_DROP, if there are no
> nested action the datapath can drop the packet.
>
> > +#ifdef __KERNEL__
> > +struct dec_ttl_arg {
> > +       u32 action_type;            /* dec_ttl action type.*/
> > +};
> > +#endif
> > +
> > +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
> >
> >  /* Meters. */
> >  #define OVS_METER_FAMILY  "ovs_meter"
> > @@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
> >         __OVS_CT_LIMIT_ATTR_MAX
> >  };
> >
> > +enum ovs_dec_ttl_action {            /*Actions supported by dec_ttl */
> > +       OVS_DEC_TTL_ACTION_DROP,
> > +       OVS_DEC_TTL_ACTION_USER_SPACE
> > +};
> >  #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
> >
> >  #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
> > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > index 4c8395462303..5329668732b1 100644
> > --- a/net/openvswitch/actions.c
> > +++ b/net/openvswitch/actions.c
> > @@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
> >         return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
> >  }
> >
> > +static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
> > +                  struct sw_flow_key *fk, const struct nlattr *attr, bool last)
> > +{
> > +       struct nlattr *actions;
> > +       struct nlattr *dec_ttl_arg;
> > +       int rem = nla_len(attr);
> > +       const struct dec_ttl_arg *arg;
> > +
> > +       /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
> > +       dec_ttl_arg = nla_data(attr);
> > +       arg = nla_data(dec_ttl_arg);
> > +       actions = nla_next(dec_ttl_arg, &rem);
> > +
> > +       switch (arg->action_type) {
> > +       case OVS_DEC_TTL_ACTION_DROP:
> > +               consume_skb(skb);
> > +               break;
> > +
> > +       case OVS_DEC_TTL_ACTION_USER_SPACE:
> > +               return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  /* When 'last' is true, sample() should always consume the 'skb'.
> >   * Otherwise, sample() should keep 'skb' intact regardless what
> >   * actions are executed within sample().
> > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> >                              nla_len(actions), last, clone_flow_key);
> >  }
> >
> > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > +{
> > +       int err;
> > +
> > +       if (skb->protocol == htons(ETH_P_IPV6)) {
> > +               struct ipv6hdr *nh = ipv6_hdr(skb);
> > +
> > +               err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > +                                         sizeof(*nh));
> There is no need to initialize 'nh', just use 'struct ipv6hdr' to get the size.

But I have to set it later to have nh->hop_limit.
Do you mean to assign it before the skb_ensure_writable check?
What differs sizeof(*nh) and sizeof(struct ipv6hdr)? The former will
work also after a refactor.

> > +               if (unlikely(err))
> > +                       return err;
> > +
> > +               if (nh->hop_limit <= 1)
> > +                       return -EHOSTUNREACH;
> > +
> > +               key->ip.ttl = --nh->hop_limit;
> > +       } else {
> > +               struct iphdr *nh = ip_hdr(skb);
> > +               u8 old_ttl;
> > +
> > +               err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > +                                         sizeof(*nh));
> same as above.
> > +               if (unlikely(err))
> > +                       return err;
> > +
> > +               nh = ip_hdr(skb);
> > +               if (nh->ttl <= 1)
> > +                       return -EHOSTUNREACH;
> > +
> > +               old_ttl = nh->ttl--;
> > +               csum_replace2(&nh->check, htons(old_ttl << 8),
> > +                             htons(nh->ttl << 8));
> > +               key->ip.ttl = nh->ttl;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  /* Execute a list of actions against 'skb'. */
> >  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> >                               struct sw_flow_key *key,
> > @@ -1347,6 +1410,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> >
> >                         break;
> >                 }
> > +
> > +               case OVS_ACTION_ATTR_DEC_TTL:
> > +                       err = execute_dec_ttl(skb, key);
> > +                       if (err == -EHOSTUNREACH) {
> Can you use unlikely().
>

sure

> > +                               err = dec_ttl(dp, skb, key, a, true);
> > +                               return err;
> > +                       }
> > +                       break;
> >                 }
> >
> >                 if (unlikely(err)) {
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index 65c2e3458ff5..a9eea2ffb8b0 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> > @@ -61,6 +61,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> >                 case OVS_ACTION_ATTR_RECIRC:
> >                 case OVS_ACTION_ATTR_TRUNC:
> >                 case OVS_ACTION_ATTR_USERSPACE:
> > +               case OVS_ACTION_ATTR_DEC_TTL:
> >                         break;
> >
> >                 case OVS_ACTION_ATTR_CT:
> > @@ -2494,6 +2495,59 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
> >         return 0;
> >  }
> >
> > +static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
> > +                                    const struct sw_flow_key *key,
> > +                                    struct sw_flow_actions **sfa,
> > +                                    __be16 eth_type, __be16 vlan_tci,
> > +                                    u32 mpls_label_count, bool log)
> > +{
> > +       struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
> > +       const struct nlattr *action_type, *action;
> > +       struct nlattr *a;
> > +       int rem, start, err;
> > +       struct dec_ttl_arg arg;
> > +
> Here we need to validate if eth_type is IPv4 or IPv6.
>
>

check for ETH_P_IP or ETH_P_IPV6 and return -EINVAL?


>
> > +       nla_for_each_nested(a, attr, rem) {
> > +               int type = nla_type(a);
> > +
> > +               if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
> > +                       return -EINVAL;
> > +
> > +               attrs[type] = a;
> > +       }
> > +       if (rem)
> > +               return -EINVAL;
> > +
> > +       action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
> > +       if (!action_type || nla_len(action_type) != sizeof(u32))
> > +               return -EINVAL;
> > +
> > +       start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
> > +       if (start < 0)
> > +               return start;
> > +
> > +       arg.action_type = nla_get_u32(action_type);
> > +       err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
> > +                                &arg, sizeof(arg), log);
> > +       if (err)
> > +               return err;
> > +
> > +       if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> > +               action = attrs[OVS_DEC_TTL_ATTR_ACTION];
> > +               if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
> > +                       return -EINVAL;
> > +
> > +               err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
> > +                                            vlan_tci, mpls_label_count, log);
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       add_nested_action_end(*sfa, start);
> > +
> > +       return 0;
> > +}
> > +
> >  static int validate_and_copy_clone(struct net *net,
> >                                    const struct nlattr *attr,
> >                                    const struct sw_flow_key *key,
> > @@ -3005,6 +3059,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >                         [OVS_ACTION_ATTR_METER] = sizeof(u32),
> >                         [OVS_ACTION_ATTR_CLONE] = (u32)-1,
> >                         [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> > +                       [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
> >                 };
> >                 const struct ovs_action_push_vlan *vlan;
> >                 int type = nla_type(a);
> > @@ -3233,6 +3288,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >                         break;
> >                 }
> >
> > +               case OVS_ACTION_ATTR_DEC_TTL:
> > +                       err = validate_and_copy_dec_ttl(net, a, key, sfa,
> > +                                                       eth_type, vlan_tci,
> > +                                                       mpls_label_count, log);
> > +                       if (err)
> > +                               return err;
> > +                       skip_copy = true;
> > +                       break;
> > +
> >                 default:
> >                         OVS_NLERR(log, "Unknown Action type %d", type);
> >                         return -EINVAL;
> > @@ -3404,6 +3468,41 @@ static int check_pkt_len_action_to_attr(const struct nlattr *attr,
> >         return err;
> >  }
> >
> > +static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
> > +{
> > +       struct nlattr *start, *ac_start = NULL, *dec_ttl;
> > +       int err = 0, rem = nla_len(att);
> > +       const struct dec_ttl_arg *arg;
> > +       struct nlattr *actions;
> > +
> > +       start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
> > +       if (!start)
> > +               return -EMSGSIZE;
> > +
> > +       dec_ttl = nla_data(att);
> > +       arg = nla_data(dec_ttl);
> > +       actions = nla_next(dec_ttl, &rem);
> > +
> > +       if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
> > +               nla_nest_cancel(skb, start);
> > +               return -EMSGSIZE;
> > +       }
> > +
> > +       if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
> > +               ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
> > +               if (!ac_start) {
> > +                       nla_nest_cancel(skb, ac_start);
> > +                       nla_nest_cancel(skb, start);
> > +                       return -EMSGSIZE;
> > +               }
> > +               err = ovs_nla_put_actions(actions, rem, skb);
> > +               nla_nest_end(skb, ac_start);
> > +       }
> > +       nla_nest_end(skb, start);
> > +
> > +       return err;
> > +}
> > +
> >  static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
> >  {
> >         const struct nlattr *ovs_key = nla_data(a);
> > @@ -3504,6 +3603,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
> >                                 return err;
> >                         break;
> >
> > +               case OVS_ACTION_ATTR_DEC_TTL:
> > +                       err = dec_ttl_action_to_attr(a, skb);
> > +                       if (err)
> > +                               return err;
> > +                       break;
> > +
> >                 default:
> >                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
> >                                 return -EMSGSIZE;
> > --
> > 2.23.0
> >
>
Pravin Shelar Dec. 20, 2019, 1:05 a.m. UTC | #4
On Thu, Dec 19, 2019 at 8:36 AM Matteo Croce <mcroce@redhat.com> wrote:
>
> On Wed, Dec 18, 2019 at 4:06 AM Pravin Shelar <pshelar@ovn.org> wrote:
> >
> > On Tue, Dec 17, 2019 at 7:51 AM Matteo Croce <mcroce@redhat.com> wrote:
> > >
> > > New action to decrement TTL instead of setting it to a fixed value.
> > > This action will decrement the TTL and, in case of expired TTL, drop it
> > > or execute an action passed via a nested attribute.
> > > The default TTL expired action is to drop the packet.
> > >
> > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> > >
> > > Tested with a corresponding change in the userspace:
> > >
> > >     # ovs-dpctl dump-flows
> > >     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > >     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > >     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> > >     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> > >
> > >     # ping -c1 192.168.0.2 -t 42
> > >     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> > >     # ping -c1 192.168.0.2 -t 120
> > >     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> > >     # ping -c1 192.168.0.2 -t 1
> > >     #
> > >
> > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > > ---
> > >  include/uapi/linux/openvswitch.h |  22 +++++++
> > >  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
> > >  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
> > >  3 files changed, 198 insertions(+)
> > >
> > > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> > > index a87b44cd5590..b6684bc04883 100644
> > > --- a/include/uapi/linux/openvswitch.h
> > > +++ b/include/uapi/linux/openvswitch.h
> > > @@ -927,6 +927,7 @@ enum ovs_action_attr {
> > >         OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
> > >         OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
> > >         OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> > > +       OVS_ACTION_ATTR_DEC_TTL,       /* Nested OVS_DEC_TTL_ATTR_*. */
> > >
> > >         __OVS_ACTION_ATTR_MAX,        /* Nothing past this will be accepted
> > >                                        * from userspace. */
> > > @@ -939,6 +940,23 @@ enum ovs_action_attr {
> > >  };
> > >
> > >  #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
> > > +enum ovs_dec_ttl_attr {
> > > +       OVS_DEC_TTL_ATTR_UNSPEC,
> > > +       OVS_DEC_TTL_ATTR_ACTION_TYPE,    /* Action Type u32 */
> > > +       OVS_DEC_TTL_ATTR_ACTION,         /* nested action */
> > > +       __OVS_DEC_TTL_ATTR_MAX,
> > > +#ifdef __KERNEL__
> > > +       OVS_DEC_TTL_ATTR_ARG          /* struct sample_arg  */
> > > +#endif
> > > +};
> > > +
> >
> > I do not see need for type or OVS_DEC_TTL_ACTION_DROP, if there are no
> > nested action the datapath can drop the packet.
> >
> > > +#ifdef __KERNEL__
> > > +struct dec_ttl_arg {
> > > +       u32 action_type;            /* dec_ttl action type.*/
> > > +};
> > > +#endif
> > > +
> > > +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
> > >
> > >  /* Meters. */
> > >  #define OVS_METER_FAMILY  "ovs_meter"
> > > @@ -1009,6 +1027,10 @@ enum ovs_ct_limit_attr {
> > >         __OVS_CT_LIMIT_ATTR_MAX
> > >  };
> > >
> > > +enum ovs_dec_ttl_action {            /*Actions supported by dec_ttl */
> > > +       OVS_DEC_TTL_ACTION_DROP,
> > > +       OVS_DEC_TTL_ACTION_USER_SPACE
> > > +};
> > >  #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
> > >
> > >  #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
> > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > > index 4c8395462303..5329668732b1 100644
> > > --- a/net/openvswitch/actions.c
> > > +++ b/net/openvswitch/actions.c
> > > @@ -960,6 +960,31 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
> > >         return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
> > >  }
> > >
> > > +static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
> > > +                  struct sw_flow_key *fk, const struct nlattr *attr, bool last)
> > > +{
> > > +       struct nlattr *actions;
> > > +       struct nlattr *dec_ttl_arg;
> > > +       int rem = nla_len(attr);
> > > +       const struct dec_ttl_arg *arg;
> > > +
> > > +       /* The first action is always OVS_DEC_TTL_ATTR_ARG. */
> > > +       dec_ttl_arg = nla_data(attr);
> > > +       arg = nla_data(dec_ttl_arg);
> > > +       actions = nla_next(dec_ttl_arg, &rem);
> > > +
> > > +       switch (arg->action_type) {
> > > +       case OVS_DEC_TTL_ACTION_DROP:
> > > +               consume_skb(skb);
> > > +               break;
> > > +
> > > +       case OVS_DEC_TTL_ACTION_USER_SPACE:
> > > +               return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +
> > >  /* When 'last' is true, sample() should always consume the 'skb'.
> > >   * Otherwise, sample() should keep 'skb' intact regardless what
> > >   * actions are executed within sample().
> > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> > >                              nla_len(actions), last, clone_flow_key);
> > >  }
> > >
> > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > > +{
> > > +       int err;
> > > +
> > > +       if (skb->protocol == htons(ETH_P_IPV6)) {
> > > +               struct ipv6hdr *nh = ipv6_hdr(skb);
> > > +
> > > +               err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > > +                                         sizeof(*nh));
> > There is no need to initialize 'nh', just use 'struct ipv6hdr' to get the size.
>
> But I have to set it later to have nh->hop_limit.
> Do you mean to assign it before the skb_ensure_writable check?
> What differs sizeof(*nh) and sizeof(struct ipv6hdr)? The former will
> work also after a refactor.
>
I meant you can initialize it after skb_ensure_writable() call to
avoid refreshing the pointer after this call.
Matteo Croce Dec. 20, 2019, 12:36 p.m. UTC | #5
On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
<nikolay@cumulusnetworks.com> wrote:
>
> On 17/12/2019 17:51, Matteo Croce wrote:
> > New action to decrement TTL instead of setting it to a fixed value.
> > This action will decrement the TTL and, in case of expired TTL, drop it
> > or execute an action passed via a nested attribute.
> > The default TTL expired action is to drop the packet.
> >
> > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> >
> > Tested with a corresponding change in the userspace:
> >
> >     # ovs-dpctl dump-flows
> >     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> >     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> >     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> >     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> >
> >     # ping -c1 192.168.0.2 -t 42
> >     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> >     # ping -c1 192.168.0.2 -t 120
> >     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> >     # ping -c1 192.168.0.2 -t 1
> >     #
> >
> > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > ---
> >  include/uapi/linux/openvswitch.h |  22 +++++++
> >  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
> >  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
> >  3 files changed, 198 insertions(+)
> >
>
> Hi Matteo,
>
> [snip]
> > +}
> > +
> >  /* When 'last' is true, sample() should always consume the 'skb'.
> >   * Otherwise, sample() should keep 'skb' intact regardless what
> >   * actions are executed within sample().
> > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> >                            nla_len(actions), last, clone_flow_key);
> >  }
> >
> > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > +{
> > +     int err;
> > +
> > +     if (skb->protocol == htons(ETH_P_IPV6)) {
> > +             struct ipv6hdr *nh = ipv6_hdr(skb);
> > +
> > +             err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > +                                       sizeof(*nh));
>
> skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
> It seems the IPv4 version below is ok as the ptr is reloaded.
>

Right

> One q as I don't know ovs that much - can this action be called only with
> skb->protocol ==  ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
>

I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
packet will pass.

Thanks,
Tonghao Zhang Dec. 24, 2019, 8:41 a.m. UTC | #6
Hi Matteo,
Did you have plan to implement the TTL decrement action in userspace
datapath(with dpdk),
I am doing some research offloading about TTL decrement action, and
may sent patch TTL decrement offload action,
using dpdk rte_flow.

On Fri, Dec 20, 2019 at 8:37 PM Matteo Croce <mcroce@redhat.com> wrote:
>
> On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
> <nikolay@cumulusnetworks.com> wrote:
> >
> > On 17/12/2019 17:51, Matteo Croce wrote:
> > > New action to decrement TTL instead of setting it to a fixed value.
> > > This action will decrement the TTL and, in case of expired TTL, drop it
> > > or execute an action passed via a nested attribute.
> > > The default TTL expired action is to drop the packet.
> > >
> > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
> > >
> > > Tested with a corresponding change in the userspace:
> > >
> > >     # ovs-dpctl dump-flows
> > >     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > >     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > >     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
> > >     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
> > >
> > >     # ping -c1 192.168.0.2 -t 42
> > >     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
> > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
> > >     # ping -c1 192.168.0.2 -t 120
> > >     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
> > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
> > >     # ping -c1 192.168.0.2 -t 1
> > >     #
> > >
> > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > > ---
> > >  include/uapi/linux/openvswitch.h |  22 +++++++
> > >  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
> > >  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
> > >  3 files changed, 198 insertions(+)
> > >
> >
> > Hi Matteo,
> >
> > [snip]
> > > +}
> > > +
> > >  /* When 'last' is true, sample() should always consume the 'skb'.
> > >   * Otherwise, sample() should keep 'skb' intact regardless what
> > >   * actions are executed within sample().
> > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
> > >                            nla_len(actions), last, clone_flow_key);
> > >  }
> > >
> > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
> > > +{
> > > +     int err;
> > > +
> > > +     if (skb->protocol == htons(ETH_P_IPV6)) {
> > > +             struct ipv6hdr *nh = ipv6_hdr(skb);
> > > +
> > > +             err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > > +                                       sizeof(*nh));
> >
> > skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
> > It seems the IPv4 version below is ok as the ptr is reloaded.
> >
>
> Right
>
> > One q as I don't know ovs that much - can this action be called only with
> > skb->protocol ==  ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
> >
>
> I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
> packet will pass.
>
> Thanks,
>
> --
> Matteo Croce
> per aspera ad upstream
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
bindiya Kurle Dec. 24, 2019, 11:16 p.m. UTC | #7
Hi Tonghao,
Once this kernel patch is integrated . I will be submitting patch to
ovs-dpdk  for implementing dec_ttl action on dpdk datapath.
Regards,
Bindiya

On Tue, Dec 24, 2019 at 2:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com>
wrote:

> Hi Matteo,
> Did you have plan to implement the TTL decrement action in userspace
> datapath(with dpdk),
> I am doing some research offloading about TTL decrement action, and
> may sent patch TTL decrement offload action,
> using dpdk rte_flow.
>
> On Fri, Dec 20, 2019 at 8:37 PM Matteo Croce <mcroce@redhat.com> wrote:
> >
> > On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
> > <nikolay@cumulusnetworks.com> wrote:
> > >
> > > On 17/12/2019 17:51, Matteo Croce wrote:
> > > > New action to decrement TTL instead of setting it to a fixed value.
> > > > This action will decrement the TTL and, in case of expired TTL, drop
> it
> > > > or execute an action passed via a nested attribute.
> > > > The default TTL expired action is to drop the packet.
> > > >
> > > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields,
> respectively.
> > > >
> > > > Tested with a corresponding change in the userspace:
> > > >
> > > >     # ovs-dpctl dump-flows
> > > >     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0,
> used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
> > > >     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0,
> used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
> > > >     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0,
> used:never, actions:2
> > > >     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0,
> used:never, actions:1
> > > >
> > > >     # ping -c1 192.168.0.2 -t 42
> > > >     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP
> (1), length 84)
> > > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq
> 1, length 64
> > > >     # ping -c1 192.168.0.2 -t 120
> > > >     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP
> (1), length 84)
> > > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq
> 1, length 64
> > > >     # ping -c1 192.168.0.2 -t 1
> > > >     #
> > > >
> > > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
> > > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
> > > > ---
> > > >  include/uapi/linux/openvswitch.h |  22 +++++++
> > > >  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
> > > >  net/openvswitch/flow_netlink.c   | 105
> +++++++++++++++++++++++++++++++
> > > >  3 files changed, 198 insertions(+)
> > > >
> > >
> > > Hi Matteo,
> > >
> > > [snip]
> > > > +}
> > > > +
> > > >  /* When 'last' is true, sample() should always consume the 'skb'.
> > > >   * Otherwise, sample() should keep 'skb' intact regardless what
> > > >   * actions are executed within sample().
> > > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct
> datapath *dp, struct sk_buff *skb,
> > > >                            nla_len(actions), last, clone_flow_key);
> > > >  }
> > > >
> > > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key
> *key)
> > > > +{
> > > > +     int err;
> > > > +
> > > > +     if (skb->protocol == htons(ETH_P_IPV6)) {
> > > > +             struct ipv6hdr *nh = ipv6_hdr(skb);
> > > > +
> > > > +             err = skb_ensure_writable(skb, skb_network_offset(skb)
> +
> > > > +                                       sizeof(*nh));
> > >
> > > skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh
> might become invalid.
> > > It seems the IPv4 version below is ok as the ptr is reloaded.
> > >
> >
> > Right
> >
> > > One q as I don't know ovs that much - can this action be called only
> with
> > > skb->protocol ==  ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not
> v6, then it must be v4 ?
> > >
> >
> > I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
> > packet will pass.
> >
> > Thanks,
> >
> > --
> > Matteo Croce
> > per aspera ad upstream
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Tonghao Zhang Dec. 25, 2019, 3:26 a.m. UTC | #8
On Wed, Dec 25, 2019 at 7:16 AM bindiya Kurle <bindiyakurle@gmail.com> wrote:
>
> Hi Tonghao,
> Once this kernel patch is integrated . I will be submitting patch to ovs-dpdk  for implementing dec_ttl action on dpdk datapath.
Good, thanks
> Regards,
> Bindiya
>
> On Tue, Dec 24, 2019 at 2:12 PM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>>
>> Hi Matteo,
>> Did you have plan to implement the TTL decrement action in userspace
>> datapath(with dpdk),
>> I am doing some research offloading about TTL decrement action, and
>> may sent patch TTL decrement offload action,
>> using dpdk rte_flow.
>>
>> On Fri, Dec 20, 2019 at 8:37 PM Matteo Croce <mcroce@redhat.com> wrote:
>> >
>> > On Tue, Dec 17, 2019 at 5:30 PM Nikolay Aleksandrov
>> > <nikolay@cumulusnetworks.com> wrote:
>> > >
>> > > On 17/12/2019 17:51, Matteo Croce wrote:
>> > > > New action to decrement TTL instead of setting it to a fixed value.
>> > > > This action will decrement the TTL and, in case of expired TTL, drop it
>> > > > or execute an action passed via a nested attribute.
>> > > > The default TTL expired action is to drop the packet.
>> > > >
>> > > > Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively.
>> > > >
>> > > > Tested with a corresponding change in the userspace:
>> > > >
>> > > >     # ovs-dpctl dump-flows
>> > > >     in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,1
>> > > >     in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1,2
>> > > >     in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2
>> > > >     in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1
>> > > >
>> > > >     # ping -c1 192.168.0.2 -t 42
>> > > >     IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84)
>> > > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64
>> > > >     # ping -c1 192.168.0.2 -t 120
>> > > >     IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84)
>> > > >         192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64
>> > > >     # ping -c1 192.168.0.2 -t 1
>> > > >     #
>> > > >
>> > > > Co-authored-by: Bindiya Kurle <bindiyakurle@gmail.com>
>> > > > Signed-off-by: Bindiya Kurle <bindiyakurle@gmail.com>
>> > > > Signed-off-by: Matteo Croce <mcroce@redhat.com>
>> > > > ---
>> > > >  include/uapi/linux/openvswitch.h |  22 +++++++
>> > > >  net/openvswitch/actions.c        |  71 +++++++++++++++++++++
>> > > >  net/openvswitch/flow_netlink.c   | 105 +++++++++++++++++++++++++++++++
>> > > >  3 files changed, 198 insertions(+)
>> > > >
>> > >
>> > > Hi Matteo,
>> > >
>> > > [snip]
>> > > > +}
>> > > > +
>> > > >  /* When 'last' is true, sample() should always consume the 'skb'.
>> > > >   * Otherwise, sample() should keep 'skb' intact regardless what
>> > > >   * actions are executed within sample().
>> > > > @@ -1176,6 +1201,44 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
>> > > >                            nla_len(actions), last, clone_flow_key);
>> > > >  }
>> > > >
>> > > > +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
>> > > > +{
>> > > > +     int err;
>> > > > +
>> > > > +     if (skb->protocol == htons(ETH_P_IPV6)) {
>> > > > +             struct ipv6hdr *nh = ipv6_hdr(skb);
>> > > > +
>> > > > +             err = skb_ensure_writable(skb, skb_network_offset(skb) +
>> > > > +                                       sizeof(*nh));
>> > >
>> > > skb_ensure_writable() calls pskb_may_pull() which may reallocate so nh might become invalid.
>> > > It seems the IPv4 version below is ok as the ptr is reloaded.
>> > >
>> >
>> > Right
>> >
>> > > One q as I don't know ovs that much - can this action be called only with
>> > > skb->protocol ==  ETH_P_IP/IPV6 ? I.e. Are we sure that if it's not v6, then it must be v4 ?
>> > >
>> >
>> > I'm adding a check in validate_and_copy_dec_ttl() so only ipv4/ipv6
>> > packet will pass.
>> >
>> > Thanks,
>> >
>> > --
>> > Matteo Croce
>> > per aspera ad upstream
>> >
>> > _______________________________________________
>> > dev mailing list
>> > dev@openvswitch.org
>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index a87b44cd5590..b6684bc04883 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -927,6 +927,7 @@  enum ovs_action_attr {
 	OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
 	OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
 	OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
+	OVS_ACTION_ATTR_DEC_TTL,       /* Nested OVS_DEC_TTL_ATTR_*. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
@@ -939,6 +940,23 @@  enum ovs_action_attr {
 };
 
 #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+enum ovs_dec_ttl_attr {
+	OVS_DEC_TTL_ATTR_UNSPEC,
+	OVS_DEC_TTL_ATTR_ACTION_TYPE,    /* Action Type u32 */
+	OVS_DEC_TTL_ATTR_ACTION,         /* nested action */
+	__OVS_DEC_TTL_ATTR_MAX,
+#ifdef __KERNEL__
+	OVS_DEC_TTL_ATTR_ARG          /* struct sample_arg  */
+#endif
+};
+
+#ifdef __KERNEL__
+struct dec_ttl_arg {
+	u32 action_type;            /* dec_ttl action type.*/
+};
+#endif
+
+#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
 
 /* Meters. */
 #define OVS_METER_FAMILY  "ovs_meter"
@@ -1009,6 +1027,10 @@  enum ovs_ct_limit_attr {
 	__OVS_CT_LIMIT_ATTR_MAX
 };
 
+enum ovs_dec_ttl_action {            /*Actions supported by dec_ttl */
+	OVS_DEC_TTL_ACTION_DROP,
+	OVS_DEC_TTL_ACTION_USER_SPACE
+};
 #define OVS_CT_LIMIT_ATTR_MAX (__OVS_CT_LIMIT_ATTR_MAX - 1)
 
 #define OVS_ZONE_LIMIT_DEFAULT_ZONE -1
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4c8395462303..5329668732b1 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -960,6 +960,31 @@  static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 	return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
 }
 
+static int dec_ttl(struct datapath *dp, struct sk_buff *skb,
+		   struct sw_flow_key *fk, const struct nlattr *attr, bool last)
+{
+	struct nlattr *actions;
+	struct nlattr *dec_ttl_arg;
+	int rem = nla_len(attr);
+	const struct dec_ttl_arg *arg;
+
+	/* The first action is always OVS_DEC_TTL_ATTR_ARG. */
+	dec_ttl_arg = nla_data(attr);
+	arg = nla_data(dec_ttl_arg);
+	actions = nla_next(dec_ttl_arg, &rem);
+
+	switch (arg->action_type) {
+	case OVS_DEC_TTL_ACTION_DROP:
+		consume_skb(skb);
+		break;
+
+	case OVS_DEC_TTL_ACTION_USER_SPACE:
+		return clone_execute(dp, skb, fk, 0, actions, rem, last, false);
+	}
+
+	return 0;
+}
+
 /* When 'last' is true, sample() should always consume the 'skb'.
  * Otherwise, sample() should keep 'skb' intact regardless what
  * actions are executed within sample().
@@ -1176,6 +1201,44 @@  static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
 			     nla_len(actions), last, clone_flow_key);
 }
 
+static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int err;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct ipv6hdr *nh = ipv6_hdr(skb);
+
+		err = skb_ensure_writable(skb, skb_network_offset(skb) +
+					  sizeof(*nh));
+		if (unlikely(err))
+			return err;
+
+		if (nh->hop_limit <= 1)
+			return -EHOSTUNREACH;
+
+		key->ip.ttl = --nh->hop_limit;
+	} else {
+		struct iphdr *nh = ip_hdr(skb);
+		u8 old_ttl;
+
+		err = skb_ensure_writable(skb, skb_network_offset(skb) +
+					  sizeof(*nh));
+		if (unlikely(err))
+			return err;
+
+		nh = ip_hdr(skb);
+		if (nh->ttl <= 1)
+			return -EHOSTUNREACH;
+
+		old_ttl = nh->ttl--;
+		csum_replace2(&nh->check, htons(old_ttl << 8),
+			      htons(nh->ttl << 8));
+		key->ip.ttl = nh->ttl;
+	}
+
+	return 0;
+}
+
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			      struct sw_flow_key *key,
@@ -1347,6 +1410,14 @@  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
 			break;
 		}
+
+		case OVS_ACTION_ATTR_DEC_TTL:
+			err = execute_dec_ttl(skb, key);
+			if (err == -EHOSTUNREACH) {
+				err = dec_ttl(dp, skb, key, a, true);
+				return err;
+			}
+			break;
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 65c2e3458ff5..a9eea2ffb8b0 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -61,6 +61,7 @@  static bool actions_may_change_flow(const struct nlattr *actions)
 		case OVS_ACTION_ATTR_RECIRC:
 		case OVS_ACTION_ATTR_TRUNC:
 		case OVS_ACTION_ATTR_USERSPACE:
+		case OVS_ACTION_ATTR_DEC_TTL:
 			break;
 
 		case OVS_ACTION_ATTR_CT:
@@ -2494,6 +2495,59 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr,
+				     const struct sw_flow_key *key,
+				     struct sw_flow_actions **sfa,
+				     __be16 eth_type, __be16 vlan_tci,
+				     u32 mpls_label_count, bool log)
+{
+	struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1] = { 0 };
+	const struct nlattr *action_type, *action;
+	struct nlattr *a;
+	int rem, start, err;
+	struct dec_ttl_arg arg;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+
+		if (!type || type > OVS_DEC_TTL_ATTR_MAX || attrs[type])
+			return -EINVAL;
+
+		attrs[type] = a;
+	}
+	if (rem)
+		return -EINVAL;
+
+	action_type = attrs[OVS_DEC_TTL_ATTR_ACTION_TYPE];
+	if (!action_type || nla_len(action_type) != sizeof(u32))
+		return -EINVAL;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
+	if (start < 0)
+		return start;
+
+	arg.action_type = nla_get_u32(action_type);
+	err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ARG,
+				 &arg, sizeof(arg), log);
+	if (err)
+		return err;
+
+	if (arg.action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
+		action = attrs[OVS_DEC_TTL_ATTR_ACTION];
+		if (!action || (nla_len(action) && nla_len(action) < NLA_HDRLEN))
+			return -EINVAL;
+
+		err = __ovs_nla_copy_actions(net, action, key, sfa, eth_type,
+					     vlan_tci, mpls_label_count, log);
+		if (err)
+			return err;
+	}
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
 static int validate_and_copy_clone(struct net *net,
 				   const struct nlattr *attr,
 				   const struct sw_flow_key *key,
@@ -3005,6 +3059,7 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
 			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
 			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
+			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3233,6 +3288,15 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			break;
 		}
 
+		case OVS_ACTION_ATTR_DEC_TTL:
+			err = validate_and_copy_dec_ttl(net, a, key, sfa,
+							eth_type, vlan_tci,
+							mpls_label_count, log);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3404,6 +3468,41 @@  static int check_pkt_len_action_to_attr(const struct nlattr *attr,
 	return err;
 }
 
+static int dec_ttl_action_to_attr(const struct nlattr *att, struct sk_buff *skb)
+{
+	struct nlattr *start, *ac_start = NULL, *dec_ttl;
+	int err = 0, rem = nla_len(att);
+	const struct dec_ttl_arg *arg;
+	struct nlattr *actions;
+
+	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
+	if (!start)
+		return -EMSGSIZE;
+
+	dec_ttl = nla_data(att);
+	arg = nla_data(dec_ttl);
+	actions = nla_next(dec_ttl, &rem);
+
+	if (nla_put_u32(skb, OVS_DEC_TTL_ATTR_ACTION_TYPE, arg->action_type)) {
+		nla_nest_cancel(skb, start);
+		return -EMSGSIZE;
+	}
+
+	if (arg->action_type == OVS_DEC_TTL_ACTION_USER_SPACE) {
+		ac_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
+		if (!ac_start) {
+			nla_nest_cancel(skb, ac_start);
+			nla_nest_cancel(skb, start);
+			return -EMSGSIZE;
+		}
+		err = ovs_nla_put_actions(actions, rem, skb);
+		nla_nest_end(skb, ac_start);
+	}
+	nla_nest_end(skb, start);
+
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3504,6 +3603,12 @@  int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_DEC_TTL:
+			err = dec_ttl_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;