Message ID | 1373480727-11254-25-git-send-email-michael.zintakis@googlemail.com |
---|---|
State | Not Applicable |
Headers | show |
Michael Zintakis <michael.zintakis@googlemail.com> wrote: > * add two variables to each nfacct object - 'pmark' and 'bmark', allowing > short-term traffic accounting to be implemented by placing a "mark" against > that object. > > This enables counting of traffic (both bytes and packets) since that mark has > been enabled/set, in addition to the main packet and byte counters. And again. Why not simply add a 2nd accounting object, and then send traffic to it based on ruleset? (-m time, -m condition, -m set, etc.)? -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jul 10, 2013 at 07:25:22PM +0100, Michael Zintakis wrote: > * add two variables to each nfacct object - 'pmark' and 'bmark', allowing > short-term traffic accounting to be implemented by placing a "mark" against > that object. > > This enables counting of traffic (both bytes and packets) since that mark has > been enabled/set, in addition to the main packet and byte counters. > > Signed-off-by: Michael Zintakis <michael.zintakis@googlemail.com> > --- > include/uapi/linux/netfilter/nfnetlink_acct.h | 8 +++- > net/netfilter/nfnetlink_acct.c | 56 +++++++++++++++++++++++++++ > 2 files changed, 63 insertions(+), 1 deletion(-) > > diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c > index 18cd28e..809fa35 100644 > --- a/net/netfilter/nfnetlink_acct.c > +++ b/net/netfilter/nfnetlink_acct.c > @@ -33,6 +33,8 @@ struct nf_acct { > atomic64_t pkts; > atomic64_t bytes; > u64 bthr; > + u64 pmark; > + u64 bmark; > u16 fmt; > u16 flags; > struct list_head head; Oh my... You insist on your idea of using the kernel as a database to simplify your user-space program. All these fields are set/unset from userspace, they are not altered by packets at all. This does not belong here. > @@ -61,6 +63,10 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, > cmd = be16_to_cpu(nla_get_be16(tb[NFACCT_CMD])); > flags = be16_to_cpu(nla_get_be16(tb[NFACCT_FLAGS])); > > + if (!(cmd & NFACCT_FLAG_MARK) && > + (tb[NFACCT_PMARK] || tb[NFACCT_BMARK])) > + return -EINVAL; > + > if (cmd & NFACCT_FLAG_BTHR && > ((flags & NFACCT_FLAG_BTHR && !tb[NFACCT_BTHR]) || > (!(flags & NFACCT_FLAG_BTHR) && tb[NFACCT_BTHR]))) > @@ -114,6 +120,25 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, > matching->fmt = > be16_to_cpu(nla_get_be16(tb[NFACCT_FMT])); > } > + /* ...then set the mark flag... */ > + if (cmd & NFACCT_FLAG_MARK) { > + if (flags & NFACCT_FLAG_MARK) { > + matching->pmark = tb[NFACCT_PMARK] ? > + be64_to_cpu( > + nla_get_be64(tb[NFACCT_PMARK])) : > + atomic64_read(&matching->pkts); > + > + matching->bmark = tb[NFACCT_BMARK] ? > + be64_to_cpu( > + nla_get_be64(tb[NFACCT_BMARK])) : > + atomic64_read(&matching->bytes); > + matching->flags |= NFACCT_FLAG_MARK; > + } else { > + matching->pmark = 0; > + matching->bmark = 0; > + matching->flags &= ~NFACCT_FLAG_MARK; > + } > + } > /* ... and finally set the bytes threshold */ > if (cmd & NFACCT_FLAG_BTHR) { > if (flags & NFACCT_FLAG_BTHR) { > @@ -147,6 +172,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, > if (tb[NFACCT_FMT]) { > nfacct->fmt = be16_to_cpu(nla_get_be16(tb[NFACCT_FMT])); > } > + if (cmd & NFACCT_FLAG_MARK && flags & NFACCT_FLAG_MARK) { > + if (tb[NFACCT_PMARK]) > + nfacct->pmark = be64_to_cpu( > + nla_get_be64(tb[NFACCT_PMARK])); > + if (tb[NFACCT_BMARK]) > + nfacct->bmark = be64_to_cpu( > + nla_get_be64(tb[NFACCT_BMARK])); > + > + nfacct->flags |= NFACCT_FLAG_MARK; > + } > if (cmd & NFACCT_FLAG_BTHR && flags & NFACCT_FLAG_BTHR) { > if (tb[NFACCT_BTHR]) > nfacct->bthr = be64_to_cpu( > @@ -184,15 +219,28 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, > if (type == NFNL_MSG_ACCT_GET_CTRZERO) { > pkts = atomic64_xchg(&acct->pkts, 0); > bytes = atomic64_xchg(&acct->bytes, 0); > + acct->pmark = 0; > + acct->bmark = 0; > } else { > pkts = atomic64_read(&acct->pkts); > bytes = atomic64_read(&acct->bytes); > + if (type == NFNL_MSG_ACCT_GET_SETMARK) { > + acct->pmark = pkts; > + acct->bmark = bytes; > + acct->flags |= NFACCT_FLAG_MARK; > + } else if (type == NFNL_MSG_ACCT_GET_CLRMARK) { > + acct->pmark = 0; > + acct->bmark = 0; > + acct->flags &= ~NFACCT_FLAG_MARK; > + } > } > if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) || > nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || > nla_put_be64(skb, NFACCT_BTHR, cpu_to_be64(acct->bthr)) || > nla_put_be16(skb, NFACCT_FMT, htons(acct->fmt)) || > nla_put_be16(skb, NFACCT_FLAGS, htons(acct->flags)) || > + nla_put_be64(skb, NFACCT_PMARK, cpu_to_be64(acct->pmark)) || > + nla_put_be64(skb, NFACCT_BMARK, cpu_to_be64(acct->bmark)) || > nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) > goto nla_put_failure; > > @@ -344,6 +392,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { > [NFACCT_FMT] = { .type = NLA_U16 }, > [NFACCT_FLAGS] = { .type = NLA_U16 }, > [NFACCT_CMD] = { .type = NLA_U16 }, > + [NFACCT_PMARK] = { .type = NLA_U64 }, > + [NFACCT_BMARK] = { .type = NLA_U64 }, > }; > > static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { > @@ -359,6 +409,12 @@ static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { > [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, > .attr_count = NFACCT_MAX, > .policy = nfnl_acct_policy }, > + [NFNL_MSG_ACCT_GET_SETMARK] = { .call = nfnl_acct_get, > + .attr_count = NFACCT_MAX, > + .policy = nfnl_acct_policy }, > + [NFNL_MSG_ACCT_GET_CLRMARK] = { .call = nfnl_acct_get, > + .attr_count = NFACCT_MAX, > + .policy = nfnl_acct_policy }, > }; > > static const struct nfnetlink_subsystem nfnl_acct_subsys = { > -- > 1.8.3.1 > -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Florian Westphal wrote: > Michael Zintakis <michael.zintakis@googlemail.com> wrote: >> * add two variables to each nfacct object - 'pmark' and 'bmark', allowing >> short-term traffic accounting to be implemented by placing a "mark" against >> that object. >> >> This enables counting of traffic (both bytes and packets) since that mark has >> been enabled/set, in addition to the main packet and byte counters. > > And again. > Why not simply add a 2nd accounting object, and then send traffic to > it based on ruleset? (-m time, -m condition, -m set, etc.)? We thought of that initially, but rejected that idea. The marking feature is, at least in our case, primarily used to measure short-term traffic. What that means is for us to be able to gather information for traffic based on certain events or characteristics in short term (peak time or certain event during which we expect our traffic to rise/slow down dramatically for example) and produce the relevant statistics without affecting the main traffic counters. Adding separate nfacct objects to the iptables rules (which is what I think you are suggesting above) isn't very efficient or practical, simply because: a) certain traffic is going to be spread over different iptables rules in different places (i.e. chains) and in order to add a "secondary" nfacct objects one needs to do that everywhere - in every rule - a certain "primary" nfacct object is used; b) a packet will have to pass through 2 nfacct objects in order to produce the desired effect (with marking, a packet passes and is counted simply once and that is the end of the story), not to mention that it would require nearly twice-as-much memory per accounting object, compared to if we use marking, so this is inefficient; and c) in order to add/remove nfacct objects for short-term traffic measurements like this and deploy what you are suggesting above, one needs to have full knowledge of the entire iptables structure and the whole netfilter system used, which is not always the case - at least not here, so what you suggest above isn't practical. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso wrote: > On Wed, Jul 10, 2013 at 07:25:22PM +0100, Michael Zintakis wrote: >> * add two variables to each nfacct object - 'pmark' and 'bmark', allowing >> short-term traffic accounting to be implemented by placing a "mark" against >> that object. >> >> This enables counting of traffic (both bytes and packets) since that mark has >> been enabled/set, in addition to the main packet and byte counters. >> >> Signed-off-by: Michael Zintakis <michael.zintakis@googlemail.com> >> --- >> include/uapi/linux/netfilter/nfnetlink_acct.h | 8 +++- >> net/netfilter/nfnetlink_acct.c | 56 +++++++++++++++++++++++++++ >> 2 files changed, 63 insertions(+), 1 deletion(-) >> >> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c >> index 18cd28e..809fa35 100644 >> --- a/net/netfilter/nfnetlink_acct.c >> +++ b/net/netfilter/nfnetlink_acct.c >> @@ -33,6 +33,8 @@ struct nf_acct { >> atomic64_t pkts; >> atomic64_t bytes; >> u64 bthr; >> + u64 pmark; >> + u64 bmark; >> u16 fmt; >> u16 flags; >> struct list_head head; > > Oh my... > > You insist on your idea of using the kernel as a database to simplify > your user-space program. All these fields are set/unset from > userspace, they are not altered by packets at all. This does not > belong here. I don't "insist" on anything Pablo and I am not using the kernel as a database either. The way these variables are structured and information is gathered/presented is the most efficient way - both memory and performance-wise, compared to what you were suggesting we do up until now (with separate file access and so on), not to mention the security aspect of it and the fact that protecting this within the kernel is much more secure and less error-prone compared to your suggestion with file access. That, at the end of the day, is what matters here. -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h index e972970..87d2615 100644 --- a/include/uapi/linux/netfilter/nfnetlink_acct.h +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h @@ -10,6 +10,8 @@ enum nfnl_acct_msg_types { NFNL_MSG_ACCT_GET, NFNL_MSG_ACCT_GET_CTRZERO, NFNL_MSG_ACCT_DEL, + NFNL_MSG_ACCT_GET_SETMARK, + NFNL_MSG_ACCT_GET_CLRMARK, NFNL_MSG_ACCT_MAX }; @@ -23,6 +25,8 @@ enum nfnl_acct_type { NFACCT_FMT, NFACCT_FLAGS, NFACCT_CMD, + NFACCT_PMARK, + NFACCT_BMARK, __NFACCT_MAX }; #define NFACCT_MAX (__NFACCT_MAX - 1) @@ -30,7 +34,9 @@ enum nfnl_acct_type { enum nfnl_acct_flags { NFACCT_FLAG_BIT_BTHR = 0, NFACCT_FLAG_BTHR = (1 << NFACCT_FLAG_BIT_BTHR), - NFACCT_FLAG_BIT_MAX = 1, + NFACCT_FLAG_BIT_MARK = 1, + NFACCT_FLAG_MARK = (1 << NFACCT_FLAG_BIT_MARK), + NFACCT_FLAG_BIT_MAX = 2, NFACCT_FLAG_MAX = (1 << NFACCT_FLAG_BIT_MAX), }; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 18cd28e..809fa35 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -33,6 +33,8 @@ struct nf_acct { atomic64_t pkts; atomic64_t bytes; u64 bthr; + u64 pmark; + u64 bmark; u16 fmt; u16 flags; struct list_head head; @@ -61,6 +63,10 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, cmd = be16_to_cpu(nla_get_be16(tb[NFACCT_CMD])); flags = be16_to_cpu(nla_get_be16(tb[NFACCT_FLAGS])); + if (!(cmd & NFACCT_FLAG_MARK) && + (tb[NFACCT_PMARK] || tb[NFACCT_BMARK])) + return -EINVAL; + if (cmd & NFACCT_FLAG_BTHR && ((flags & NFACCT_FLAG_BTHR && !tb[NFACCT_BTHR]) || (!(flags & NFACCT_FLAG_BTHR) && tb[NFACCT_BTHR]))) @@ -114,6 +120,25 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, matching->fmt = be16_to_cpu(nla_get_be16(tb[NFACCT_FMT])); } + /* ...then set the mark flag... */ + if (cmd & NFACCT_FLAG_MARK) { + if (flags & NFACCT_FLAG_MARK) { + matching->pmark = tb[NFACCT_PMARK] ? + be64_to_cpu( + nla_get_be64(tb[NFACCT_PMARK])) : + atomic64_read(&matching->pkts); + + matching->bmark = tb[NFACCT_BMARK] ? + be64_to_cpu( + nla_get_be64(tb[NFACCT_BMARK])) : + atomic64_read(&matching->bytes); + matching->flags |= NFACCT_FLAG_MARK; + } else { + matching->pmark = 0; + matching->bmark = 0; + matching->flags &= ~NFACCT_FLAG_MARK; + } + } /* ... and finally set the bytes threshold */ if (cmd & NFACCT_FLAG_BTHR) { if (flags & NFACCT_FLAG_BTHR) { @@ -147,6 +172,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (tb[NFACCT_FMT]) { nfacct->fmt = be16_to_cpu(nla_get_be16(tb[NFACCT_FMT])); } + if (cmd & NFACCT_FLAG_MARK && flags & NFACCT_FLAG_MARK) { + if (tb[NFACCT_PMARK]) + nfacct->pmark = be64_to_cpu( + nla_get_be64(tb[NFACCT_PMARK])); + if (tb[NFACCT_BMARK]) + nfacct->bmark = be64_to_cpu( + nla_get_be64(tb[NFACCT_BMARK])); + + nfacct->flags |= NFACCT_FLAG_MARK; + } if (cmd & NFACCT_FLAG_BTHR && flags & NFACCT_FLAG_BTHR) { if (tb[NFACCT_BTHR]) nfacct->bthr = be64_to_cpu( @@ -184,15 +219,28 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, if (type == NFNL_MSG_ACCT_GET_CTRZERO) { pkts = atomic64_xchg(&acct->pkts, 0); bytes = atomic64_xchg(&acct->bytes, 0); + acct->pmark = 0; + acct->bmark = 0; } else { pkts = atomic64_read(&acct->pkts); bytes = atomic64_read(&acct->bytes); + if (type == NFNL_MSG_ACCT_GET_SETMARK) { + acct->pmark = pkts; + acct->bmark = bytes; + acct->flags |= NFACCT_FLAG_MARK; + } else if (type == NFNL_MSG_ACCT_GET_CLRMARK) { + acct->pmark = 0; + acct->bmark = 0; + acct->flags &= ~NFACCT_FLAG_MARK; + } } if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) || nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || nla_put_be64(skb, NFACCT_BTHR, cpu_to_be64(acct->bthr)) || nla_put_be16(skb, NFACCT_FMT, htons(acct->fmt)) || nla_put_be16(skb, NFACCT_FLAGS, htons(acct->flags)) || + nla_put_be64(skb, NFACCT_PMARK, cpu_to_be64(acct->pmark)) || + nla_put_be64(skb, NFACCT_BMARK, cpu_to_be64(acct->bmark)) || nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) goto nla_put_failure; @@ -344,6 +392,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_FMT] = { .type = NLA_U16 }, [NFACCT_FLAGS] = { .type = NLA_U16 }, [NFACCT_CMD] = { .type = NLA_U16 }, + [NFACCT_PMARK] = { .type = NLA_U64 }, + [NFACCT_BMARK] = { .type = NLA_U64 }, }; static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { @@ -359,6 +409,12 @@ static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, .attr_count = NFACCT_MAX, .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET_SETMARK] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET_CLRMARK] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, }; static const struct nfnetlink_subsystem nfnl_acct_subsys = {
* add two variables to each nfacct object - 'pmark' and 'bmark', allowing short-term traffic accounting to be implemented by placing a "mark" against that object. This enables counting of traffic (both bytes and packets) since that mark has been enabled/set, in addition to the main packet and byte counters. Signed-off-by: Michael Zintakis <michael.zintakis@googlemail.com> --- include/uapi/linux/netfilter/nfnetlink_acct.h | 8 +++- net/netfilter/nfnetlink_acct.c | 56 +++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-)