[ovs-dev,v2,2/6] datapath: Add a new action check_pkt_len

Message ID 20190412130537.28478-1-nusiddiq@redhat.com
State Superseded
Headers show
Series
  • Address MTU issue for larger packets in OVN
Related show

Commit Message

Numan Siddique April 12, 2019, 1:05 p.m.
From: Numan Siddique <nusiddiq@redhat.com>

Upstream commit:
    commit 4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9
    Author: Numan Siddique <nusiddiq@redhat.com>
    Date:   Tue Mar 26 06:13:46 2019 +0530

    net: openvswitch: Add a new action check_pkt_len

    This patch adds a new action - 'check_pkt_len' which checks the
    packet length and executes a set of actions if the packet
    length is greater than the specified length or executes
    another set of actions if the packet length is lesser or equal to.

    This action takes below nlattrs
      * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for

      * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER - Nested actions
        to apply if the packet length is greater than the specified 'pkt_len'

      * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested
        actions to apply if the packet length is lesser or equal to the
        specified 'pkt_len'.

    The main use case for adding this action is to solve the packet
    drops because of MTU mismatch in OVN virtual networking solution.
    When a VM (which belongs to a logical switch of OVN) sends a packet
    destined to go via the gateway router and if the nic which provides
    external connectivity, has a lesser MTU, OVS drops the packet
    if the packet length is greater than this MTU.

    With the help of this action, OVN will check the packet length
    and if it is greater than the MTU size, it will generate an
    ICMP packet (type 3, code 4) and includes the next hop mtu in it
    so that the sender can fragment the packets.

    Reported-at:
    https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html
    Suggested-by: Ben Pfaff <blp@ovn.org>
    Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
    CC: Gregory Rose <gvrose8192@gmail.com>
    CC: Pravin B Shelar <pshelar@ovn.org>
    Acked-by: Pravin B Shelar <pshelar@ovn.org>
    Tested-by: Greg Rose <gvrose8192@gmail.com>
    Reviewed-by: Greg Rose <gvrose8192@gmail.com>
    Signed-off-by: David S. Miller <davem@davemloft.net>

Use of 'nla_parse_strict()' (in validate_and_copy_check_len()) is available
only in recent kernels. So changed it to 'nla_parse_nested()'.

Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
---
 datapath/actions.c                            |  44 +++++
 datapath/flow_netlink.c                       | 171 ++++++++++++++++++
 .../linux/compat/include/linux/openvswitch.h  |  17 ++
 3 files changed, 232 insertions(+)

Patch

diff --git a/datapath/actions.c b/datapath/actions.c
index 8abe70aa5..f5db12389 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -1219,6 +1219,40 @@  static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
 	return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
 }
 
+static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
+				 struct sw_flow_key *key,
+				 const struct nlattr *attr, bool last)
+{
+	const struct nlattr *actions, *cpl_arg;
+	const struct check_pkt_len_arg *arg;
+	int rem = nla_len(attr);
+	bool clone_flow_key;
+
+	/* The first netlink attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
+	 */
+	cpl_arg = nla_data(attr);
+	arg = nla_data(cpl_arg);
+
+	if (skb->len <= arg->pkt_len) {
+		/* Second netlink attribute in 'attr' is always
+		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+		 */
+		actions = nla_next(cpl_arg, &rem);
+		clone_flow_key = !arg->exec_for_lesser_equal;
+	} else {
+		/* Third netlink attribute in 'attr' is always
+		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
+		 */
+		actions = nla_next(cpl_arg, &rem);
+		actions = nla_next(actions, &rem);
+		clone_flow_key = !arg->exec_for_greater;
+	}
+
+	return clone_execute(dp, skb, key, 0, nla_data(actions),
+			     nla_len(actions), last, clone_flow_key);
+}
+
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			      struct sw_flow_key *key,
@@ -1379,6 +1413,16 @@  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				return err;
 			break;
 		}
+
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
+                        bool last = nla_is_last(a, rem);
+
+                        err = execute_check_pkt_len(dp, skb, key, a, last);
+                        if (last)
+                                return err;
+
+                        break;
+                }
 		}
 
 		if (unlikely(err)) {
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index e5e469a84..f915fc0d2 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -93,6 +93,7 @@  static bool actions_may_change_flow(const struct nlattr *actions)
 		case OVS_ACTION_ATTR_SET:
 		case OVS_ACTION_ATTR_SET_MASKED:
 		case OVS_ACTION_ATTR_METER:
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
 		default:
 			return true;
 		}
@@ -2846,6 +2847,87 @@  static int validate_userspace(const struct nlattr *attr)
 	return 0;
 }
 
+static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = {
+	[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 },
+	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED },
+	[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED },
+};
+
+static int validate_and_copy_check_pkt_len(struct net *net,
+					   const struct nlattr *attr,
+					   const struct sw_flow_key *key,
+					   struct sw_flow_actions **sfa,
+					   __be16 eth_type, __be16 vlan_tci,
+					   bool log, bool last)
+{
+	const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
+	struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
+	struct check_pkt_len_arg arg;
+	int nested_acts_start;
+	int start, err;
+
+	err = nla_parse_nested(a, OVS_CHECK_PKT_LEN_ATTR_MAX, attr,
+			       cpl_policy, NULL);
+	if (err)
+		return err;
+
+	if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] ||
+	    !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]))
+		return -EINVAL;
+
+	acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
+	acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
+
+	/* Both the nested action should be present. */
+	if (!acts_if_greater || !acts_if_lesser_eq)
+		return -EINVAL;
+
+	/* validation done, copy the nested actions. */
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN,
+					log);
+	if (start < 0)
+		return start;
+
+	arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]);
+	arg.exec_for_lesser_equal =
+		last || !actions_may_change_flow(acts_if_lesser_eq);
+	arg.exec_for_greater =
+		last || !actions_may_change_flow(acts_if_greater);
+
+	err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg,
+				 sizeof(arg), log);
+	if (err)
+		return err;
+
+	nested_acts_start = add_nested_action_start(sfa,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log);
+	if (nested_acts_start < 0)
+		return nested_acts_start;
+
+	err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
+				     eth_type, vlan_tci, log);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, nested_acts_start);
+
+	nested_acts_start = add_nested_action_start(sfa,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log);
+	if (nested_acts_start < 0)
+		return nested_acts_start;
+
+	err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
+				     eth_type, vlan_tci, log);
+
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, nested_acts_start);
+	add_nested_action_end(*sfa, start);
+	return 0;
+}
+
 static int copy_action(const struct nlattr *from,
 		       struct sw_flow_actions **sfa, bool log)
 {
@@ -2892,6 +2974,7 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_POP_NSH] = 0,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
 			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
+			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3093,6 +3176,19 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			break;
 		}
 
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
+                        bool last = nla_is_last(a, rem);
+
+                        err = validate_and_copy_check_pkt_len(net, a, key, sfa,
+                                                              eth_type,
+                                                              vlan_tci, log,
+                                                              last);
+                        if (err)
+                                return err;
+                        skip_copy = true;
+                        break;
+                }
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3191,6 +3287,75 @@  static int clone_action_to_attr(const struct nlattr *attr,
 	return err;
 }
 
+static int check_pkt_len_action_to_attr(const struct nlattr *attr,
+					struct sk_buff *skb)
+{
+	struct nlattr *start, *ac_start = NULL;
+	const struct check_pkt_len_arg *arg;
+	const struct nlattr *a, *cpl_arg;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN);
+	if (!start)
+		return -EMSGSIZE;
+
+	/* The first nested attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
+	 */
+	cpl_arg = nla_data(attr);
+	arg = nla_data(cpl_arg);
+
+	if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	/* Second nested attribute in 'attr' is always
+	 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+	 */
+	a = nla_next(cpl_arg, &rem);
+	ac_start =  nla_nest_start(skb,
+		OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		goto out;
+	} else {
+		nla_nest_end(skb, ac_start);
+	}
+
+	/* Third nested attribute in 'attr' is always
+	 * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER.
+	 */
+	a = nla_next(a, &rem);
+	ac_start =  nla_nest_start(skb,
+				   OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER);
+	if (!ac_start) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+	if (err) {
+		nla_nest_cancel(skb, ac_start);
+		goto out;
+	} else {
+		nla_nest_end(skb, ac_start);
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+
+out:
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3285,6 +3450,12 @@  int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+                        err = check_pkt_len_action_to_attr(a, skb);
+                        if (err)
+                                return err;
+                        break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index ce364e577..65a003a62 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -877,10 +877,27 @@  enum ovs_check_pkt_len_attr {
 	OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER,
 	OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL,
 	__OVS_CHECK_PKT_LEN_ATTR_MAX,
+
+#ifdef __KERNEL__
+	OVS_CHECK_PKT_LEN_ATTR_ARG          /* struct check_pkt_len_arg  */
+#endif
 };
 
 #define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1)
 
+#ifdef __KERNEL__
+struct check_pkt_len_arg {
+        u16 pkt_len;    /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */
+        bool exec_for_greater;  /* When true, actions in IF_GREATE will
+                                 * not change flow keys. False otherwise.
+                                 */
+        bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL
+                                     * will not change flow keys. False
+                                     * otherwise.
+                                     */
+};
+#endif
+
 /**
  * enum ovs_action_attr - Action types.
  *