Patchwork [v3,kernel,24/29] add packets and bytes mark capability to nfacct

login
register
mail settings
Submitter Michael Zintakis
Date July 10, 2013, 6:25 p.m.
Message ID <1373480727-11254-25-git-send-email-michael.zintakis@googlemail.com>
Download mbox | patch
Permalink /patch/258212/
State Not Applicable
Headers show

Comments

Michael Zintakis - July 10, 2013, 6:25 p.m.
* add two variables to each nfacct object - 'pmark' and 'bmark', allowing
short-term traffic accounting to be implemented by placing a "mark" against
that object.

This enables counting of traffic (both bytes and packets) since that mark has
been enabled/set, in addition to the main packet and byte counters.

Signed-off-by: Michael Zintakis <michael.zintakis@googlemail.com>
---
 include/uapi/linux/netfilter/nfnetlink_acct.h |  8 +++-
 net/netfilter/nfnetlink_acct.c                | 56 +++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)
Florian Westphal - July 10, 2013, 8:01 p.m.
Michael Zintakis <michael.zintakis@googlemail.com> wrote:
> * add two variables to each nfacct object - 'pmark' and 'bmark', allowing
> short-term traffic accounting to be implemented by placing a "mark" against
> that object.
> 
> This enables counting of traffic (both bytes and packets) since that mark has
> been enabled/set, in addition to the main packet and byte counters.

And again.
Why not simply add a 2nd accounting object, and then send traffic to
it based on ruleset? (-m time, -m condition, -m set, etc.)?

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira - July 11, 2013, 1:14 a.m.
On Wed, Jul 10, 2013 at 07:25:22PM +0100, Michael Zintakis wrote:
> * add two variables to each nfacct object - 'pmark' and 'bmark', allowing
> short-term traffic accounting to be implemented by placing a "mark" against
> that object.
> 
> This enables counting of traffic (both bytes and packets) since that mark has
> been enabled/set, in addition to the main packet and byte counters.
> 
> Signed-off-by: Michael Zintakis <michael.zintakis@googlemail.com>
> ---
>  include/uapi/linux/netfilter/nfnetlink_acct.h |  8 +++-
>  net/netfilter/nfnetlink_acct.c                | 56 +++++++++++++++++++++++++++
>  2 files changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
> index 18cd28e..809fa35 100644
> --- a/net/netfilter/nfnetlink_acct.c
> +++ b/net/netfilter/nfnetlink_acct.c
> @@ -33,6 +33,8 @@ struct nf_acct {
>  	atomic64_t		pkts;
>  	atomic64_t		bytes;
>  	u64			bthr;
> +	u64			pmark;
> +	u64			bmark;
>  	u16			fmt;
>  	u16			flags;
>  	struct list_head	head;

Oh my...

You insist on your idea of using the kernel as a database to simplify
your user-space program. All these fields are set/unset from
userspace, they are not altered by packets at all. This does not
belong here.

> @@ -61,6 +63,10 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>  		cmd = be16_to_cpu(nla_get_be16(tb[NFACCT_CMD]));
>  		flags = be16_to_cpu(nla_get_be16(tb[NFACCT_FLAGS]));
>  
> +		if (!(cmd & NFACCT_FLAG_MARK) &&
> +		     (tb[NFACCT_PMARK] || tb[NFACCT_BMARK]))
> +			return -EINVAL;
> +
>  		if (cmd & NFACCT_FLAG_BTHR &&
>  		    ((flags & NFACCT_FLAG_BTHR && !tb[NFACCT_BTHR]) ||
>  		     (!(flags & NFACCT_FLAG_BTHR) && tb[NFACCT_BTHR])))
> @@ -114,6 +120,25 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>  				matching->fmt =
>  				be16_to_cpu(nla_get_be16(tb[NFACCT_FMT]));
>  			}
> +			/* ...then set the mark flag... */
> +			if (cmd & NFACCT_FLAG_MARK) {
> +				if (flags & NFACCT_FLAG_MARK) {
> +					matching->pmark = tb[NFACCT_PMARK] ?
> +					be64_to_cpu(
> +					  nla_get_be64(tb[NFACCT_PMARK])) :
> +					atomic64_read(&matching->pkts);
> +
> +					matching->bmark = tb[NFACCT_BMARK] ?
> +					be64_to_cpu(
> +					  nla_get_be64(tb[NFACCT_BMARK])) :
> +					atomic64_read(&matching->bytes);
> +					matching->flags |= NFACCT_FLAG_MARK;
> +				} else {
> +					matching->pmark = 0;
> +					matching->bmark = 0;
> +					matching->flags &= ~NFACCT_FLAG_MARK;
> +				}
> +			}
>  			/* ... and finally set the bytes threshold */
>  			if (cmd & NFACCT_FLAG_BTHR) {
>  				if (flags & NFACCT_FLAG_BTHR) {
> @@ -147,6 +172,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>  	if (tb[NFACCT_FMT]) {
>  		nfacct->fmt = be16_to_cpu(nla_get_be16(tb[NFACCT_FMT]));
>  	}
> +	if (cmd & NFACCT_FLAG_MARK && flags & NFACCT_FLAG_MARK) {
> +		if (tb[NFACCT_PMARK])
> +			nfacct->pmark = be64_to_cpu(
> +				nla_get_be64(tb[NFACCT_PMARK]));
> +		if (tb[NFACCT_BMARK])
> +			nfacct->bmark = be64_to_cpu(
> +				nla_get_be64(tb[NFACCT_BMARK]));
> +
> +		nfacct->flags |= NFACCT_FLAG_MARK;
> +	}
>  	if (cmd & NFACCT_FLAG_BTHR && flags & NFACCT_FLAG_BTHR) {
>  		if (tb[NFACCT_BTHR])
>  			nfacct->bthr = be64_to_cpu(
> @@ -184,15 +219,28 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
>  	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
>  		pkts = atomic64_xchg(&acct->pkts, 0);
>  		bytes = atomic64_xchg(&acct->bytes, 0);
> +		acct->pmark = 0;
> +		acct->bmark = 0;
>  	} else {
>  		pkts = atomic64_read(&acct->pkts);
>  		bytes = atomic64_read(&acct->bytes);
> +		if (type == NFNL_MSG_ACCT_GET_SETMARK) {
> +			acct->pmark = pkts;
> +			acct->bmark = bytes;
> +			acct->flags |= NFACCT_FLAG_MARK;
> +		} else if (type == NFNL_MSG_ACCT_GET_CLRMARK) {
> +			acct->pmark = 0;
> +			acct->bmark = 0;
> +			acct->flags &= ~NFACCT_FLAG_MARK;
> +		}
>  	}
>  	if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) ||
>  	    nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
>  	    nla_put_be64(skb, NFACCT_BTHR, cpu_to_be64(acct->bthr)) ||
>  	    nla_put_be16(skb, NFACCT_FMT, htons(acct->fmt)) ||
>  	    nla_put_be16(skb, NFACCT_FLAGS, htons(acct->flags)) ||
> +	    nla_put_be64(skb, NFACCT_PMARK, cpu_to_be64(acct->pmark)) ||
> +	    nla_put_be64(skb, NFACCT_BMARK, cpu_to_be64(acct->bmark)) ||
>  	    nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
>  		goto nla_put_failure;
>  
> @@ -344,6 +392,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
>  	[NFACCT_FMT] = { .type = NLA_U16 },
>  	[NFACCT_FLAGS] = { .type = NLA_U16 },
>  	[NFACCT_CMD] = { .type = NLA_U16 },
> +	[NFACCT_PMARK] = { .type = NLA_U64 },
> +	[NFACCT_BMARK] = { .type = NLA_U64 },
>  };
>  
>  static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
> @@ -359,6 +409,12 @@ static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
>  	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
>  					    .attr_count = NFACCT_MAX,
>  					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_GET_SETMARK] 	= { .call = nfnl_acct_get,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
> +	[NFNL_MSG_ACCT_GET_CLRMARK] 	= { .call = nfnl_acct_get,
> +					    .attr_count = NFACCT_MAX,
> +					    .policy = nfnl_acct_policy },
>  };
>  
>  static const struct nfnetlink_subsystem nfnl_acct_subsys = {
> -- 
> 1.8.3.1
> 
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Zintakis - July 11, 2013, 6:56 p.m.
Florian Westphal wrote:
> Michael Zintakis <michael.zintakis@googlemail.com> wrote:
>> * add two variables to each nfacct object - 'pmark' and 'bmark', allowing
>> short-term traffic accounting to be implemented by placing a "mark" against
>> that object.
>>
>> This enables counting of traffic (both bytes and packets) since that mark has
>> been enabled/set, in addition to the main packet and byte counters.
> 
> And again.
> Why not simply add a 2nd accounting object, and then send traffic to
> it based on ruleset? (-m time, -m condition, -m set, etc.)?
We thought of that initially, but rejected that idea.

The marking feature is, at least in our case, primarily used to measure short-term traffic. What that means is for us to be able to gather information for traffic based on certain events or characteristics in short term (peak time or certain event during which we expect our traffic to rise/slow down dramatically for example) and produce the relevant statistics without affecting the main traffic counters.

Adding separate nfacct objects to the iptables rules (which is what I think you are suggesting above) isn't very efficient or practical, simply because: a) certain traffic is going to be spread over different iptables rules in different places (i.e. chains) and in order to add a "secondary" nfacct objects one needs to do that everywhere - in every rule - a certain "primary" nfacct object is used; b) a packet will have to pass through 2 nfacct objects in order to produce the desired effect (with marking, a packet passes and is counted simply once and that is the end of the story), not to mention that it would require nearly twice-as-much memory per accounting object, compared to if we use marking, so this is inefficient; and c) in order to add/remove nfacct objects for short-term traffic measurements like this and deploy what you are suggesting above, one needs to have full knowledge of the entire iptables structure and the whole netfilter system used, which is not always the 
case - at least not here, so what you suggest above isn't practical.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Zintakis - July 11, 2013, 6:56 p.m.
Pablo Neira Ayuso wrote:
> On Wed, Jul 10, 2013 at 07:25:22PM +0100, Michael Zintakis wrote:
>> * add two variables to each nfacct object - 'pmark' and 'bmark', allowing
>> short-term traffic accounting to be implemented by placing a "mark" against
>> that object.
>>
>> This enables counting of traffic (both bytes and packets) since that mark has
>> been enabled/set, in addition to the main packet and byte counters.
>>
>> Signed-off-by: Michael Zintakis <michael.zintakis@googlemail.com>
>> ---
>>  include/uapi/linux/netfilter/nfnetlink_acct.h |  8 +++-
>>  net/netfilter/nfnetlink_acct.c                | 56 +++++++++++++++++++++++++++
>>  2 files changed, 63 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
>> index 18cd28e..809fa35 100644
>> --- a/net/netfilter/nfnetlink_acct.c
>> +++ b/net/netfilter/nfnetlink_acct.c
>> @@ -33,6 +33,8 @@ struct nf_acct {
>>  	atomic64_t		pkts;
>>  	atomic64_t		bytes;
>>  	u64			bthr;
>> +	u64			pmark;
>> +	u64			bmark;
>>  	u16			fmt;
>>  	u16			flags;
>>  	struct list_head	head;
> 
> Oh my...
> 
> You insist on your idea of using the kernel as a database to simplify
> your user-space program. All these fields are set/unset from
> userspace, they are not altered by packets at all. This does not
> belong here.
I don't "insist" on anything Pablo and I am not using the kernel as a database either.

The way these variables are structured and information is gathered/presented is the most efficient way - both memory and performance-wise, compared to what you were suggesting we do up until now (with separate file access and so on), not to mention the security aspect of it and the fact that protecting this within the kernel is much more secure and less error-prone compared to your suggestion with file access. That, at the end of the day, is what matters here.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index e972970..87d2615 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -10,6 +10,8 @@  enum nfnl_acct_msg_types {
 	NFNL_MSG_ACCT_GET,
 	NFNL_MSG_ACCT_GET_CTRZERO,
 	NFNL_MSG_ACCT_DEL,
+	NFNL_MSG_ACCT_GET_SETMARK,
+	NFNL_MSG_ACCT_GET_CLRMARK,
 	NFNL_MSG_ACCT_MAX
 };
 
@@ -23,6 +25,8 @@  enum nfnl_acct_type {
 	NFACCT_FMT,
 	NFACCT_FLAGS,
 	NFACCT_CMD,
+	NFACCT_PMARK,
+	NFACCT_BMARK,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
@@ -30,7 +34,9 @@  enum nfnl_acct_type {
 enum nfnl_acct_flags {
 	NFACCT_FLAG_BIT_BTHR 	= 0,
 	NFACCT_FLAG_BTHR	= (1 << NFACCT_FLAG_BIT_BTHR),
-	NFACCT_FLAG_BIT_MAX	= 1,
+	NFACCT_FLAG_BIT_MARK	= 1,
+	NFACCT_FLAG_MARK	= (1 << NFACCT_FLAG_BIT_MARK),
+	NFACCT_FLAG_BIT_MAX	= 2,
 	NFACCT_FLAG_MAX		= (1 << NFACCT_FLAG_BIT_MAX),
 };
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 18cd28e..809fa35 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -33,6 +33,8 @@  struct nf_acct {
 	atomic64_t		pkts;
 	atomic64_t		bytes;
 	u64			bthr;
+	u64			pmark;
+	u64			bmark;
 	u16			fmt;
 	u16			flags;
 	struct list_head	head;
@@ -61,6 +63,10 @@  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 		cmd = be16_to_cpu(nla_get_be16(tb[NFACCT_CMD]));
 		flags = be16_to_cpu(nla_get_be16(tb[NFACCT_FLAGS]));
 
+		if (!(cmd & NFACCT_FLAG_MARK) &&
+		     (tb[NFACCT_PMARK] || tb[NFACCT_BMARK]))
+			return -EINVAL;
+
 		if (cmd & NFACCT_FLAG_BTHR &&
 		    ((flags & NFACCT_FLAG_BTHR && !tb[NFACCT_BTHR]) ||
 		     (!(flags & NFACCT_FLAG_BTHR) && tb[NFACCT_BTHR])))
@@ -114,6 +120,25 @@  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 				matching->fmt =
 				be16_to_cpu(nla_get_be16(tb[NFACCT_FMT]));
 			}
+			/* ...then set the mark flag... */
+			if (cmd & NFACCT_FLAG_MARK) {
+				if (flags & NFACCT_FLAG_MARK) {
+					matching->pmark = tb[NFACCT_PMARK] ?
+					be64_to_cpu(
+					  nla_get_be64(tb[NFACCT_PMARK])) :
+					atomic64_read(&matching->pkts);
+
+					matching->bmark = tb[NFACCT_BMARK] ?
+					be64_to_cpu(
+					  nla_get_be64(tb[NFACCT_BMARK])) :
+					atomic64_read(&matching->bytes);
+					matching->flags |= NFACCT_FLAG_MARK;
+				} else {
+					matching->pmark = 0;
+					matching->bmark = 0;
+					matching->flags &= ~NFACCT_FLAG_MARK;
+				}
+			}
 			/* ... and finally set the bytes threshold */
 			if (cmd & NFACCT_FLAG_BTHR) {
 				if (flags & NFACCT_FLAG_BTHR) {
@@ -147,6 +172,16 @@  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	if (tb[NFACCT_FMT]) {
 		nfacct->fmt = be16_to_cpu(nla_get_be16(tb[NFACCT_FMT]));
 	}
+	if (cmd & NFACCT_FLAG_MARK && flags & NFACCT_FLAG_MARK) {
+		if (tb[NFACCT_PMARK])
+			nfacct->pmark = be64_to_cpu(
+				nla_get_be64(tb[NFACCT_PMARK]));
+		if (tb[NFACCT_BMARK])
+			nfacct->bmark = be64_to_cpu(
+				nla_get_be64(tb[NFACCT_BMARK]));
+
+		nfacct->flags |= NFACCT_FLAG_MARK;
+	}
 	if (cmd & NFACCT_FLAG_BTHR && flags & NFACCT_FLAG_BTHR) {
 		if (tb[NFACCT_BTHR])
 			nfacct->bthr = be64_to_cpu(
@@ -184,15 +219,28 @@  nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
 		pkts = atomic64_xchg(&acct->pkts, 0);
 		bytes = atomic64_xchg(&acct->bytes, 0);
+		acct->pmark = 0;
+		acct->bmark = 0;
 	} else {
 		pkts = atomic64_read(&acct->pkts);
 		bytes = atomic64_read(&acct->bytes);
+		if (type == NFNL_MSG_ACCT_GET_SETMARK) {
+			acct->pmark = pkts;
+			acct->bmark = bytes;
+			acct->flags |= NFACCT_FLAG_MARK;
+		} else if (type == NFNL_MSG_ACCT_GET_CLRMARK) {
+			acct->pmark = 0;
+			acct->bmark = 0;
+			acct->flags &= ~NFACCT_FLAG_MARK;
+		}
 	}
 	if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) ||
 	    nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
 	    nla_put_be64(skb, NFACCT_BTHR, cpu_to_be64(acct->bthr)) ||
 	    nla_put_be16(skb, NFACCT_FMT, htons(acct->fmt)) ||
 	    nla_put_be16(skb, NFACCT_FLAGS, htons(acct->flags)) ||
+	    nla_put_be64(skb, NFACCT_PMARK, cpu_to_be64(acct->pmark)) ||
+	    nla_put_be64(skb, NFACCT_BMARK, cpu_to_be64(acct->bmark)) ||
 	    nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
 		goto nla_put_failure;
 
@@ -344,6 +392,8 @@  static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
 	[NFACCT_FMT] = { .type = NLA_U16 },
 	[NFACCT_FLAGS] = { .type = NLA_U16 },
 	[NFACCT_CMD] = { .type = NLA_U16 },
+	[NFACCT_PMARK] = { .type = NLA_U64 },
+	[NFACCT_BMARK] = { .type = NLA_U64 },
 };
 
 static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -359,6 +409,12 @@  static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
 	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
 					    .attr_count = NFACCT_MAX,
 					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_GET_SETMARK] 	= { .call = nfnl_acct_get,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_GET_CLRMARK] 	= { .call = nfnl_acct_get,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
 };
 
 static const struct nfnetlink_subsystem nfnl_acct_subsys = {