diff mbox series

[net-next,v16,5/8] sch_cake: Add NAT awareness to packet classifier

Message ID 152751766690.30935.18178441475189968162.stgit@alrua-kau
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series sched: Add Common Applications Kept Enhanced (cake) qdisc | expand

Commit Message

Toke Høiland-Jørgensen May 28, 2018, 2:27 p.m. UTC
When CAKE is deployed on a gateway that also performs NAT (which is a
common deployment mode), the host fairness mechanism cannot distinguish
internal hosts from each other, and so fails to work correctly.

To fix this, we add an optional NAT awareness mode, which will query the
kernel conntrack mechanism to obtain the pre-NAT addresses for each packet
and use that in the flow and host hashing.

When the shaper is enabled and the host is already performing NAT, the cost
of this lookup is negligible. However, in unlimited mode with no NAT being
performed, there is a significant CPU cost at higher bandwidths. For this
reason, the feature is turned off by default.

Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/sched/sch_cake.c |   46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Pablo Neira Ayuso May 28, 2018, 7:51 p.m. UTC | #1
On Mon, May 28, 2018 at 04:27:46PM +0200, Toke Høiland-Jørgensen wrote:
> When CAKE is deployed on a gateway that also performs NAT (which is a
> common deployment mode), the host fairness mechanism cannot distinguish
> internal hosts from each other, and so fails to work correctly.
> 
> To fix this, we add an optional NAT awareness mode, which will query the
> kernel conntrack mechanism to obtain the pre-NAT addresses for each packet
> and use that in the flow and host hashing.
> 
> When the shaper is enabled and the host is already performing NAT, the cost
> of this lookup is negligible. However, in unlimited mode with no NAT being
> performed, there is a significant CPU cost at higher bandwidths. For this
> reason, the feature is turned off by default.
> 
> Cc: netfilter-devel@vger.kernel.org
> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
> ---
>  net/sched/sch_cake.c |   46 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 46 insertions(+)
> 
> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> index 68ac908470f1..fecd9caac0cc 100644
> --- a/net/sched/sch_cake.c
> +++ b/net/sched/sch_cake.c
> @@ -71,6 +71,10 @@
>  #include <net/tcp.h>
>  #include <net/flow_dissector.h>
>  
> +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
> +#include <net/netfilter/nf_conntrack_core.h>
> +#endif
> +
>  #define CAKE_SET_WAYS (8)
>  #define CAKE_MAX_TINS (8)
>  #define CAKE_QUEUES (1024)
> @@ -516,6 +520,29 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
>  	return drop;
>  }
>  
> +static void cake_update_flowkeys(struct flow_keys *keys,
> +				 const struct sk_buff *skb)
> +{
> +#if IS_ENABLED(CONFIG_NF_CONNTRACK)

I would remove the ifdef, not really needed, it will simplify things.

But I leave it to you to decide, this is not I deal breaker.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toke Høiland-Jørgensen May 28, 2018, 10:19 p.m. UTC | #2
Pablo Neira Ayuso <pablo@netfilter.org> writes:

> On Mon, May 28, 2018 at 04:27:46PM +0200, Toke Høiland-Jørgensen wrote:
>> When CAKE is deployed on a gateway that also performs NAT (which is a
>> common deployment mode), the host fairness mechanism cannot distinguish
>> internal hosts from each other, and so fails to work correctly.
>> 
>> To fix this, we add an optional NAT awareness mode, which will query the
>> kernel conntrack mechanism to obtain the pre-NAT addresses for each packet
>> and use that in the flow and host hashing.
>> 
>> When the shaper is enabled and the host is already performing NAT, the cost
>> of this lookup is negligible. However, in unlimited mode with no NAT being
>> performed, there is a significant CPU cost at higher bandwidths. For this
>> reason, the feature is turned off by default.
>> 
>> Cc: netfilter-devel@vger.kernel.org
>> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> ---
>>  net/sched/sch_cake.c |   46 ++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 46 insertions(+)
>> 
>> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
>> index 68ac908470f1..fecd9caac0cc 100644
>> --- a/net/sched/sch_cake.c
>> +++ b/net/sched/sch_cake.c
>> @@ -71,6 +71,10 @@
>>  #include <net/tcp.h>
>>  #include <net/flow_dissector.h>
>>  
>> +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
>> +#include <net/netfilter/nf_conntrack_core.h>
>> +#endif
>> +
>>  #define CAKE_SET_WAYS (8)
>>  #define CAKE_MAX_TINS (8)
>>  #define CAKE_QUEUES (1024)
>> @@ -516,6 +520,29 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
>>  	return drop;
>>  }
>>  
>> +static void cake_update_flowkeys(struct flow_keys *keys,
>> +				 const struct sk_buff *skb)
>> +{
>> +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
>
> I would remove the ifdef, not really needed, it will simplify things.
>
> But I leave it to you to decide, this is not I deal breaker.

If I remove it I get a bunch of 'incomplete type' errors when compiling.
Besides, we use it to report an error to userspace when conntrack is
disabled anyway, so might as well keep the whole thing ifdef'ed.

-Toke
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox series

Patch

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 68ac908470f1..fecd9caac0cc 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -71,6 +71,10 @@ 
 #include <net/tcp.h>
 #include <net/flow_dissector.h>
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
 #define CAKE_SET_WAYS (8)
 #define CAKE_MAX_TINS (8)
 #define CAKE_QUEUES (1024)
@@ -516,6 +520,29 @@  static bool cobalt_should_drop(struct cobalt_vars *vars,
 	return drop;
 }
 
+static void cake_update_flowkeys(struct flow_keys *keys,
+				 const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	struct nf_conntrack_tuple tuple = {};
+	bool rev = !skb->_nfct;
+
+	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+		return;
+
+	if (!nf_ct_get_tuple_skb(&tuple, skb))
+		return;
+
+	keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+	keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+
+	if (keys->ports.ports) {
+		keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+		keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+	}
+#endif
+}
+
 /* Cake has several subtle multiple bit settings. In these cases you
  *  would be matching triple isolate mode as well.
  */
@@ -543,6 +570,9 @@  static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 	skb_flow_dissect_flow_keys(skb, &keys,
 				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
 
+	if (flow_mode & CAKE_FLOW_NAT_FLAG)
+		cake_update_flowkeys(&keys, skb);
+
 	/* flow_hash_from_keys() sorts the addresses by value, so we have
 	 * to preserve their order in a separate data structure to treat
 	 * src and dst host addresses as independently selectable.
@@ -1919,6 +1949,18 @@  static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	if (err < 0)
 		return err;
 
+	if (tb[TCA_CAKE_NAT]) {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+		q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+		q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+			!!nla_get_u32(tb[TCA_CAKE_NAT]);
+#else
+		NL_SET_ERR_MSG_ATTR(extack, "No conntrack support in kernel",
+				    tb[TCA_CAKE_NAT]);
+		return -EOPNOTSUPP;
+#endif
+	}
+
 	if (tb[TCA_CAKE_BASE_RATE64])
 		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
 
@@ -2091,6 +2133,10 @@  static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_CAKE_NAT,
+			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure: