diff mbox

[1/1] netfilter: xtables: add quota support to nfacct

Message ID 1386780798-24374-2-git-send-email-mathieu.poirier@linaro.org
State Changes Requested
Headers show

Commit Message

Mathieu Poirier Dec. 11, 2013, 4:53 p.m. UTC
From: Mathieu Poirier <mathieu.poirier@linaro.org>

Adding packet and byte quota support.  Once a quota has been
reached a noticifaction is sent to user space that includes
the name of the accounting object along with the current byte
and packet count.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
---
 include/linux/netfilter/nfnetlink_acct.h      |  4 ++
 include/uapi/linux/netfilter/nfnetlink.h      |  2 +
 include/uapi/linux/netfilter/nfnetlink_acct.h |  1 +
 include/uapi/linux/netfilter/xt_nfacct.h      | 11 +++++
 net/netfilter/Kconfig                         |  3 +-
 net/netfilter/nfnetlink_acct.c                | 15 ++++++-
 net/netfilter/xt_nfacct.c                     | 65 ++++++++++++++++++++++++++-
 7 files changed, 97 insertions(+), 4 deletions(-)

Comments

Pablo Neira Ayuso Dec. 18, 2013, 9:53 a.m. UTC | #1
Hi Mathieu,

On Wed, Dec 11, 2013 at 09:53:18AM -0700, mathieu.poirier@linaro.org wrote:
> From: Mathieu Poirier <mathieu.poirier@linaro.org>
> 
> Adding packet and byte quota support.  Once a quota has been
> reached a noticifaction is sent to user space that includes
> the name of the accounting object along with the current byte
> and packet count.
> 
> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> ---
>  include/linux/netfilter/nfnetlink_acct.h      |  4 ++
>  include/uapi/linux/netfilter/nfnetlink.h      |  2 +
>  include/uapi/linux/netfilter/nfnetlink_acct.h |  1 +
>  include/uapi/linux/netfilter/xt_nfacct.h      | 11 +++++
>  net/netfilter/Kconfig                         |  3 +-
>  net/netfilter/nfnetlink_acct.c                | 15 ++++++-
>  net/netfilter/xt_nfacct.c                     | 65 ++++++++++++++++++++++++++-
>  7 files changed, 97 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
> index bb4bbc9..46a9dda 100644
> --- a/include/linux/netfilter/nfnetlink_acct.h
> +++ b/include/linux/netfilter/nfnetlink_acct.h
> @@ -9,5 +9,9 @@ struct nf_acct;
>  extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
>  extern void nfnl_acct_put(struct nf_acct *acct);
>  extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
> +extern u64 nfnl_acct_get_pkts(struct nf_acct *acct);
> +extern u64 nfnl_acct_get_bytes(struct nf_acct *acct);
> +extern int nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
> +				u32 type, int event, struct nf_acct *acct);
>  
>  #endif /* _NFNL_ACCT_H */
> diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
> index 4a4efaf..e7e5851 100644
> --- a/include/uapi/linux/netfilter/nfnetlink.h
> +++ b/include/uapi/linux/netfilter/nfnetlink.h
> @@ -18,6 +18,8 @@ enum nfnetlink_groups {
>  #define NFNLGRP_CONNTRACK_EXP_UPDATE	NFNLGRP_CONNTRACK_EXP_UPDATE
>  	NFNLGRP_CONNTRACK_EXP_DESTROY,
>  #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
> +	NFNLGRP_CONNTRACK_QUOTA,
> +#define NFNLGRP_CONNTRACK_QUOTA		NFNLGRP_CONNTRACK_QUOTA

Use NFNLGRP_ACCT_QUOTA, this has nothing to do with conntrack.

>  	__NFNLGRP_MAX,
>  };
>  #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
> index c7b6269..ae8ea0a 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
> @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>  	NFACCT_PKTS,
>  	NFACCT_BYTES,
>  	NFACCT_USE,
> +	NFACCT_QUOTA,
>  	__NFACCT_MAX
>  };
>  #define NFACCT_MAX (__NFACCT_MAX - 1)
> diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
> index 3e19c8a..c2e49a6 100644
> --- a/include/uapi/linux/netfilter/xt_nfacct.h
> +++ b/include/uapi/linux/netfilter/xt_nfacct.h
> @@ -3,11 +3,22 @@
>  
>  #include <linux/netfilter/nfnetlink_acct.h>
>  
> +enum xt_quota_flags {
> +	XT_QUOTA_INVERT    = 1 << 0,

I don't understand the interaction of invert and the event delivery.

> +	XT_QUOTA_PACKET    = 1 << 1,
> +	XT_QUOTA_QUOTA	   = 1 << 2,

XT_QUOTA_QUOTA ? :-)

I suggest XT_NFACCT_QUOTA_PKTS and XT_NFACCT_QUOTA_BYTES.

> +};
> +
>  struct nf_acct;
> +struct nf_acct_quota;
>  
>  struct xt_nfacct_match_info {
>  	char		name[NFACCT_NAME_MAX];
>  	struct nf_acct	*nfacct;
> +

You cannot add new field to this structure like this since it would
break backward compatibility. You have to add a v1 revision of this
structure, see net/netfilter/xt_NFQUEUE.c for instance on how to user
the iptables revision infrastructure.

> +	u_int8_t flags;

Better use __u32 for these flags to fill the hole.

> +	aligned_u64 quota;

Use __aligned_u64.

> +	struct nf_acct_quota	*priv;

This has to look like this:

/* Used internally by the kernel */
struct nf_acct_quota *priv __attribute__((aligned(8)));

But see below, I have a proposal to avoid this private area.

>  };
>  
>  #endif /* _XT_NFACCT_MATCH_H */
> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
> index 6e839b6..aa635d8 100644
> --- a/net/netfilter/Kconfig
> +++ b/net/netfilter/Kconfig
> @@ -1056,7 +1056,8 @@ config NETFILTER_XT_MATCH_NFACCT
>  	select NETFILTER_NETLINK_ACCT
>  	help
>  	  This option allows you to use the extended accounting through
> -	  nfnetlink_acct.
> +	  nfnetlink_acct.  It is also possible to add quotas at the
> +	  packet and byte level.
>  
>  	  To compile it as a module, choose M here.  If unsure, say N.
>  
> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
> index c7b6d46..20f1dad 100644
> --- a/net/netfilter/nfnetlink_acct.c
> +++ b/net/netfilter/nfnetlink_acct.c
> @@ -92,7 +92,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>  	return 0;
>  }
>  
> -static int
> +int
>  nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
>  		   int event, struct nf_acct *acct)
>  {
> @@ -134,6 +134,7 @@ nla_put_failure:
>  	nlmsg_cancel(skb, nlh);
>  	return -1;
>  }
> +EXPORT_SYMBOL_GPL(nfnl_acct_fill_info);
>  
>  static int
>  nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
> @@ -336,6 +337,18 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
>  }
>  EXPORT_SYMBOL_GPL(nfnl_acct_update);
>  
> +u64 nfnl_acct_get_pkts(struct nf_acct *nfacct)
> +{
> +	return atomic64_read(&nfacct->pkts);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_get_pkts);
> +
> +u64 nfnl_acct_get_bytes(struct nf_acct *nfacct)
> +{
> +	return atomic64_read(&nfacct->bytes);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_acct_get_bytes);

I think you can move the definition of struct nf_acct to
include/net/netfilter/nfnetlink_acct.h, so we don't need these
two exported symbols.

> +
>  static int __init nfnl_acct_init(void)
>  {
>  	int ret;
> diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
> index b3be0ef..6ed807c 100644
> --- a/net/netfilter/xt_nfacct.c
> +++ b/net/netfilter/xt_nfacct.c
> @@ -8,10 +8,14 @@
>   */
>  #include <linux/module.h>
>  #include <linux/skbuff.h>
> +#include <linux/spinlock.h>
>  
> +#include <net/netlink.h>
>  #include <linux/netfilter/x_tables.h>
>  #include <linux/netfilter/nfnetlink_acct.h>
> +#include <linux/netfilter/nfnetlink.h>
>  #include <linux/netfilter/xt_nfacct.h>
> +#include <linux/netfilter_ipv4/ipt_ULOG.h>
>  
>  MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
>  MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
> @@ -19,13 +23,62 @@ MODULE_LICENSE("GPL");
>  MODULE_ALIAS("ipt_nfacct");
>  MODULE_ALIAS("ip6t_nfacct");
>  
> +struct nf_acct_quota {
> +	spinlock_t lock;

A spinlock to protect a boolean is rather expensive.

> +	bool quota_reached; /* true if quota has been reached. */

I think you can avoid this private area (including the spinlock) by
storing a copy of the packet/byte counter before calling
nfnl_acct_update(). Then if you note that old packet/byte counter was
below quota but new is overquota, then you have to send an event, ie.

        old_pkts = atomic64_read(info->nfacct->pkts);
        new_pkts = nfnl_acct_update(skb, info->nfacct, info->flags);
        if (old_pkts < info->quota && new_pkts > info->quota)
                send event

There's still one problem with this approach since it is racy, as it
may send several events, so you can refine it with:

        if (old_pkts < info->quota && new_pkts > info->quota
            && old_pkts + 1 == new_pkts)
                send event

You will need two different functions depending on the quota mode
(packet or bytes), and you will also need to modify nfnl_acct_update()
to return the new number of packets or bytes (depending on the flags),
you can use atomic_add_return and atomic_inc_return respectively to
retrieve the new (updated) value.

> +};
> +
> +static void nfacct_quota_log(struct nf_acct *cur,
> +			const struct sk_buff *skb)

Define this nfacct_quota_event function in nfnetlink_acct and export
it, thus, you don't need to export nfnl_acct_fill_info().

> +{
> +	int ret;
> +	struct sk_buff *skb2;
> +
> +	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
                                             ^
This is called from interrupt context as well (input packets), this
has to be GFP_ATOMIC.

> +	if (skb2 == NULL) {
> +		pr_err("nfacct_quota_log: nlmsg_new failed\n");

Please, remove the error message.

> +		return;
> +	}
> +
> +	ret = nfnl_acct_fill_info(skb2, 0, 0, NFACCT_QUOTA,
> +						NFNL_MSG_ACCT_NEW, cur);
                                  ^-------------
                                       wrong coding style

> +
> +	if (ret <= 0) {
> +		kfree_skb(skb2);
> +		pr_err("nfacct_quota_log: nfnl_acct_fill_info failed\n");

remove this as well.

> +		return;
> +	}
> +
> +	netlink_broadcast(init_net.nfnl, skb2, 0,
> +					 NFNLGRP_CONNTRACK_QUOTA, GFP_ATOMIC);

wrong coding style:

        netlink_broadcast(init_net.nfnl, skb2, 0,
                          NFNLGRP_CONNTRACK_QUOTA, GFP_ATOMIC);

> +}
> +
>  static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
>  {
> -	const struct xt_nfacct_match_info *info = par->targinfo;
> +	u64 val;
> +	struct xt_nfacct_match_info *info = (void *) par->targinfo;
> +	bool ret = info->flags & XT_QUOTA_INVERT;
>  
>  	nfnl_acct_update(skb, info->nfacct);
>  
> -	return true;
> +	if (info->flags & XT_QUOTA_QUOTA) {
> +		spin_lock_bh(&info->priv->lock);
> +		val = (info->flags & XT_QUOTA_PACKET) ?
> +				nfnl_acct_get_pkts(info->nfacct) :
> +				nfnl_acct_get_bytes(info->nfacct);
> +		if (val <= info->quota) {
> +			ret = !ret;
> +			info->priv->quota_reached = false;
> +		} else {
> +			if (!info->priv->quota_reached) {
> +				info->priv->quota_reached = true;
> +				nfacct_quota_log(info->nfacct, skb);
> +			}
> +		}
> +		spin_unlock_bh(&info->priv->lock);
> +	}
> +
> +	return ret;
>  }
>  
>  static int
> @@ -40,7 +93,14 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>  			"does not exists\n", info->name);
>  		return -ENOENT;
>  	}
> +
>  	info->nfacct = nfacct;
> +
> +	if ((info->flags & XT_QUOTA_QUOTA) && !info->priv) {
> +		info->priv =  kmalloc(sizeof(struct nf_acct_quota), GFP_KERNEL);

You always have to check the return value of kmalloc, the allocation
may fail.

> +		spin_lock_init(&info->priv->lock);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -50,6 +110,7 @@ nfacct_mt_destroy(const struct xt_mtdtor_param *par)
>  	const struct xt_nfacct_match_info *info = par->matchinfo;
>  
>  	nfnl_acct_put(info->nfacct);
> +	kfree(info->priv);
>  }
>  
>  static struct xt_match nfacct_mt_reg __read_mostly = {
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Dec. 19, 2013, 7:43 p.m. UTC | #2
On Thu, Dec 19, 2013 at 10:20:56AM -0700, Mathieu Poirier wrote:
>      >       NFNLGRP_CONNTRACK_EXP_DESTROY,
>      >  #define NFNLGRP_CONNTRACK_EXP_DESTROY
>      NFNLGRP_CONNTRACK_EXP_DESTROY
>      > +     NFNLGRP_CONNTRACK_QUOTA,
>      > +#define NFNLGRP_CONNTRACK_QUOTA
>      NFNLGRP_CONNTRACK_QUOTA
>      Use NFNLGRP_ACCT_QUOTA, this has nothing to do with conntrack.
> 
>    Please confirm that you suggest to create a
>    "enum nfnl_acct_groups{}"
>    in include/uapi/linux/netfilter/nfnetlink_acct.h, the same way as
>    above?

No. I just mean that you rename that since it this has nothing to do
with conntrack.

>      >       __NFNLGRP_MAX,
>      >  };
>      >  #define NFNLGRP_MAX  (__NFNLGRP_MAX - 1)
>      > diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h
>      b/include/uapi/linux/netfilter/nfnetlink_acct.h
>      > index c7b6269..ae8ea0a 100644
>      > --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
>      > +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
>      > @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>      >       NFACCT_PKTS,
>      >       NFACCT_BYTES,
>      >       NFACCT_USE,
>      > +     NFACCT_QUOTA,
>      >       __NFACCT_MAX
>      >  };
>      >  #define NFACCT_MAX (__NFACCT_MAX - 1)
>      > diff --git a/include/uapi/linux/netfilter/xt_nfacct.h
>      b/include/uapi/linux/netfilter/xt_nfacct.h
>      > index 3e19c8a..c2e49a6 100644
>      > --- a/include/uapi/linux/netfilter/xt_nfacct.h
>      > +++ b/include/uapi/linux/netfilter/xt_nfacct.h
>      > @@ -3,11 +3,22 @@
>      >
>      >  #include <linux/netfilter/nfnetlink_acct.h>
>      >
>      > +enum xt_quota_flags {
>      > +     XT_QUOTA_INVERT    = 1 << 0,
>      I don't understand the interaction of invert and the event delivery.
> 
>    It was added for flexibility [...]

I mean: This is currently broken in your patch, it is always
delivering an event when the quota is reached, no matter if invert is
set or not.

>      > +     XT_QUOTA_PACKET    = 1 << 1,
>      > +     XT_QUOTA_QUOTA     = 1 << 2,
>      XT_QUOTA_QUOTA ? :-)
> 
>    Yes - quotas are not mandatory [...]

I'm just proposing a plain rename:

s/XT_QUOTA_PACKET/XT_NFACCT_QUOTA_PKTS
s/XT_QUOTA_QUOTA/XT_NFACCT_QUOTA_BYTES

XT_QUOTA refers to the xt_quota match, which is a different iptables
match extensions.

Thinking again on the event delivery, I think it's better if the
nfacct match using the new --quota does not deliver the event itself.
You can use libnetfilter_queue instead, eg.

        iptables -I INPUT -p icmp \
                 -m nfacct icmp --quota 12345 --mode bytes --match-once \
                 -j NFLOG --nflog-prefix "icmp: " --nflog-group 34

The --once parameter tells to match only if you just crossed the quota
limit (so the event is sent once). The idea is to use nflog to deliver
the event, which is way more flexible as it includes useful
information.

P.S: please disable HTML in your emails. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Dec. 20, 2013, 8:34 p.m. UTC | #3
On 19 December 2013 12:43, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Thu, Dec 19, 2013 at 10:20:56AM -0700, Mathieu Poirier wrote:
>>      >       NFNLGRP_CONNTRACK_EXP_DESTROY,
>>      >  #define NFNLGRP_CONNTRACK_EXP_DESTROY
>>      NFNLGRP_CONNTRACK_EXP_DESTROY
>>      > +     NFNLGRP_CONNTRACK_QUOTA,
>>      > +#define NFNLGRP_CONNTRACK_QUOTA
>>      NFNLGRP_CONNTRACK_QUOTA
>>      Use NFNLGRP_ACCT_QUOTA, this has nothing to do with conntrack.
>>
>>    Please confirm that you suggest to create a
>>    "enum nfnl_acct_groups{}"
>>    in include/uapi/linux/netfilter/nfnetlink_acct.h, the same way as
>>    above?
>
> No. I just mean that you rename that since it this has nothing to do
> with conntrack.
>
>>      >       __NFNLGRP_MAX,
>>      >  };
>>      >  #define NFNLGRP_MAX  (__NFNLGRP_MAX - 1)
>>      > diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h
>>      b/include/uapi/linux/netfilter/nfnetlink_acct.h
>>      > index c7b6269..ae8ea0a 100644
>>      > --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
>>      > +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
>>      > @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>>      >       NFACCT_PKTS,
>>      >       NFACCT_BYTES,
>>      >       NFACCT_USE,
>>      > +     NFACCT_QUOTA,
>>      >       __NFACCT_MAX
>>      >  };
>>      >  #define NFACCT_MAX (__NFACCT_MAX - 1)
>>      > diff --git a/include/uapi/linux/netfilter/xt_nfacct.h
>>      b/include/uapi/linux/netfilter/xt_nfacct.h
>>      > index 3e19c8a..c2e49a6 100644
>>      > --- a/include/uapi/linux/netfilter/xt_nfacct.h
>>      > +++ b/include/uapi/linux/netfilter/xt_nfacct.h
>>      > @@ -3,11 +3,22 @@
>>      >
>>      >  #include <linux/netfilter/nfnetlink_acct.h>
>>      >
>>      > +enum xt_quota_flags {
>>      > +     XT_QUOTA_INVERT    = 1 << 0,
>>      I don't understand the interaction of invert and the event delivery.
>>
>>    It was added for flexibility [...]
>
> I mean: This is currently broken in your patch, it is always
> delivering an event when the quota is reached, no matter if invert is
> set or not.
>
>>      > +     XT_QUOTA_PACKET    = 1 << 1,
>>      > +     XT_QUOTA_QUOTA     = 1 << 2,
>>      XT_QUOTA_QUOTA ? :-)
>>
>>    Yes - quotas are not mandatory [...]
>
> I'm just proposing a plain rename:
>
> s/XT_QUOTA_PACKET/XT_NFACCT_QUOTA_PKTS
> s/XT_QUOTA_QUOTA/XT_NFACCT_QUOTA_BYTES
>
> XT_QUOTA refers to the xt_quota match, which is a different iptables
> match extensions.
>
> Thinking again on the event delivery, I think it's better if the
> nfacct match using the new --quota does not deliver the event itself.
> You can use libnetfilter_queue instead, eg.
>
>         iptables -I INPUT -p icmp \
>                  -m nfacct icmp --quota 12345 --mode bytes --match-once \
>                  -j NFLOG --nflog-prefix "icmp: " --nflog-group 34
>
> The --once parameter tells to match only if you just crossed the quota
> limit (so the event is sent once). The idea is to use nflog to deliver
> the event, which is way more flexible as it includes useful
> information.

I'm not against the idea as it is less code for me to write.  Is this
"--match-one" thing already available?  If not I'll come up with it.
Just to be clear, if "--match-one" isn't specified a message is sent
each time we try to send a packets and the quota has been reached.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Dec. 21, 2013, 8:55 a.m. UTC | #4
On Fri, Dec 20, 2013 at 01:34:00PM -0700, Mathieu Poirier wrote:
> On 19 December 2013 12:43, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
[...]
> > Thinking again on the event delivery, I think it's better if the
> > nfacct match using the new --quota does not deliver the event itself.
> > You can use libnetfilter_queue instead, eg.
> >
> >         iptables -I INPUT -p icmp \
> >                  -m nfacct icmp --quota 12345 --mode bytes --match-once \
> >                  -j NFLOG --nflog-prefix "icmp: " --nflog-group 34
> >
> > The --once parameter tells to match only if you just crossed the quota
> > limit (so the event is sent once). The idea is to use nflog to deliver
> > the event, which is way more flexible as it includes useful
> > information.
> 
> I'm not against the idea as it is less code for me to write.  Is this
> "--match-one" thing already available?  If not I'll come up with it.

The --match-once that I propose is specific to nfacct, so you need to
add a new flag to indicate this matching mode and return true only
once for that rule.

> Just to be clear, if "--match-one" isn't specified a message is sent
> each time we try to send a packets and the quota has been reached.

Exactly, in the example I provided above, if no --match-once is
specified, you will get a log message per packet over quota.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Dec. 29, 2013, 9:53 p.m. UTC | #5
On 21 December 2013 01:55, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Fri, Dec 20, 2013 at 01:34:00PM -0700, Mathieu Poirier wrote:
>> On 19 December 2013 12:43, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> [...]
>> > Thinking again on the event delivery, I think it's better if the
>> > nfacct match using the new --quota does not deliver the event itself.
>> > You can use libnetfilter_queue instead, eg.
>> >
>> >         iptables -I INPUT -p icmp \
>> >                  -m nfacct icmp --quota 12345 --mode bytes --match-once \
>> >                  -j NFLOG --nflog-prefix "icmp: " --nflog-group 34
>> >

Thinking further on this...

Unless I'm missing something the above only specifies when to log
quota transgression, hence introducing the need to write yet another
rule do explicitly deal with the packet.  My previous solution logged
quota excess _and_ dealt with the packet.

Using ' nfulnl_log_packet()' (if even possible) would seem hackish to me.

Can you suggest anything that would unite our two vision?


>> > The --once parameter tells to match only if you just crossed the quota
>> > limit (so the event is sent once). The idea is to use nflog to deliver
>> > the event, which is way more flexible as it includes useful
>> > information.
>>
>> I'm not against the idea as it is less code for me to write.  Is this
>> "--match-one" thing already available?  If not I'll come up with it.
>
> The --match-once that I propose is specific to nfacct, so you need to
> add a new flag to indicate this matching mode and return true only
> once for that rule.
>
>> Just to be clear, if "--match-one" isn't specified a message is sent
>> each time we try to send a packets and the quota has been reached.
>
> Exactly, in the example I provided above, if no --match-once is
> specified, you will get a log message per packet over quota.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Dec. 30, 2013, 5:36 p.m. UTC | #6
On Sun, Dec 29, 2013 at 02:53:15PM -0700, Mathieu Poirier wrote:
> On 21 December 2013 01:55, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > On Fri, Dec 20, 2013 at 01:34:00PM -0700, Mathieu Poirier wrote:
> >> On 19 December 2013 12:43, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > [...]
> >> > Thinking again on the event delivery, I think it's better if the
> >> > nfacct match using the new --quota does not deliver the event itself.
> >> > You can use libnetfilter_queue instead, eg.
> >> >
> >> >         iptables -I INPUT -p icmp \
> >> >                  -m nfacct icmp --quota 12345 --mode bytes --match-once \
> >> >                  -j NFLOG --nflog-prefix "icmp: " --nflog-group 34
> >> >
> 
> Thinking further on this...
> 
> Unless I'm missing something the above only specifies when to log
> quota transgression, hence introducing the need to write yet another
> rule do explicitly deal with the packet.  My previous solution logged
> quota excess _and_ dealt with the packet.

What kind of "deal with the packet" you need to make in case you
reach the quota? Please, elaborate your use case with hypothetical
(iptables) examples so I can help better.

> Using ' nfulnl_log_packet()' (if even possible) would seem hackish to me.

That don't like that choice either.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Dec. 30, 2013, 5:56 p.m. UTC | #7
On 30 December 2013 10:36, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Sun, Dec 29, 2013 at 02:53:15PM -0700, Mathieu Poirier wrote:
>> On 21 December 2013 01:55, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> > On Fri, Dec 20, 2013 at 01:34:00PM -0700, Mathieu Poirier wrote:
>> >> On 19 December 2013 12:43, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> > [...]
>> >> > Thinking again on the event delivery, I think it's better if the
>> >> > nfacct match using the new --quota does not deliver the event itself.
>> >> > You can use libnetfilter_queue instead, eg.
>> >> >
>> >> >         iptables -I INPUT -p icmp \
>> >> >                  -m nfacct icmp --quota 12345 --mode bytes --match-once \
>> >> >                  -j NFLOG --nflog-prefix "icmp: " --nflog-group 34
>> >> >
>>
>> Thinking further on this...
>>
>> Unless I'm missing something the above only specifies when to log
>> quota transgression, hence introducing the need to write yet another
>> rule do explicitly deal with the packet.  My previous solution logged
>> quota excess _and_ dealt with the packet.
>
> What kind of "deal with the packet" you need to make in case you
> reach the quota? Please, elaborate your use case with hypothetical
> (iptables) examples so I can help better.

Apologies for not expressing myself clearly.

iptables -I OUTPUT -p http \
        -m nfacct --nfacct-name icmp-limit --quota 10000 -j REJECT

Upon reaching the limit of 10000 byte of http traffic, any outgoing
http packets will be dropped and a single broadcast message will be
sent to user space.  That is because the match explicitly takes care
of sending the notification.

With your proposal:

iptables -I OUTPUT -p http \
         -m nfacct --nfacct-name http-limit --quota 10000 --match-once \
         -j NFLOG --nflog-prefix "http: " --nflog-group 34

will log the quota reached event but won't prevent further http
traffic from going out.  One could instinctively add another rule
right after the above one, something like:

iptables -I OUTPUT -p http \
         -m nfacct --nfacct-name http-limit --quota 10000 \
         -j REJECT

but that won't work either because the packet/byte could will be
incremented twice.

>
>> Using ' nfulnl_log_packet()' (if even possible) would seem hackish to me.
>
> That don't like that choice either.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Westphal Dec. 30, 2013, 9:46 p.m. UTC | #8
Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> Upon reaching the limit of 10000 byte of http traffic, any outgoing
> http packets will be dropped and a single broadcast message will be
> sent to user space.  That is because the match explicitly takes care
> of sending the notification.
> 
> With your proposal:
> 
> iptables -I OUTPUT -p http \
>          -m nfacct --nfacct-name http-limit --quota 10000 --match-once \
>          -j NFLOG --nflog-prefix "http: " --nflog-group 34
> 
> will log the quota reached event but won't prevent further http
> traffic from going out.  One could instinctively add another rule
> right after the above one, something like:
> 
> iptables -I OUTPUT -p http \
>          -m nfacct --nfacct-name http-limit --quota 10000 \
>          -j REJECT
> 
> but that won't work either because the packet/byte could will be
> incremented twice.

The usual workaround is to create custom chains to deal with this,
i.e.
iptables -N LOG_DROP_HTTP
iptables -A LOG_DROP_HTTP -j NFLOG --nflog-prefix "http: " --nflog-group 34
iptables -A LOG_DROP_HTTP -j REJECT
iptables -I OUTPUT -p http -m nfacct ... -j LOG_DROP_HTTP
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Dec. 30, 2013, 10:17 p.m. UTC | #9
Hey Florian - I think that is an acceptable compromise.  The LOG chain
and rules are extra but it is setup only once and as such scale well.

Thank you for that,
Mathieu

On 30 December 2013 14:46, Florian Westphal <fw@strlen.de> wrote:
> Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
>> Upon reaching the limit of 10000 byte of http traffic, any outgoing
>> http packets will be dropped and a single broadcast message will be
>> sent to user space.  That is because the match explicitly takes care
>> of sending the notification.
>>
>> With your proposal:
>>
>> iptables -I OUTPUT -p http \
>>          -m nfacct --nfacct-name http-limit --quota 10000 --match-once \
>>          -j NFLOG --nflog-prefix "http: " --nflog-group 34
>>
>> will log the quota reached event but won't prevent further http
>> traffic from going out.  One could instinctively add another rule
>> right after the above one, something like:
>>
>> iptables -I OUTPUT -p http \
>>          -m nfacct --nfacct-name http-limit --quota 10000 \
>>          -j REJECT
>>
>> but that won't work either because the packet/byte could will be
>> incremented twice.
>
> The usual workaround is to create custom chains to deal with this,
> i.e.
> iptables -N LOG_DROP_HTTP
> iptables -A LOG_DROP_HTTP -j NFLOG --nflog-prefix "http: " --nflog-group 34
> iptables -A LOG_DROP_HTTP -j REJECT
> iptables -I OUTPUT -p http -m nfacct ... -j LOG_DROP_HTTP
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Dec. 30, 2013, 11:14 p.m. UTC | #10
On 30 December 2013 15:17, Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> Hey Florian - I think that is an acceptable compromise.  The LOG chain
> and rules are extra but it is setup only once and as such scale well.
>
> Thank you for that,
> Mathieu
>
> On 30 December 2013 14:46, Florian Westphal <fw@strlen.de> wrote:
>> Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
>>> Upon reaching the limit of 10000 byte of http traffic, any outgoing
>>> http packets will be dropped and a single broadcast message will be
>>> sent to user space.  That is because the match explicitly takes care
>>> of sending the notification.
>>>
>>> With your proposal:
>>>
>>> iptables -I OUTPUT -p http \
>>>          -m nfacct --nfacct-name http-limit --quota 10000 --match-once \
>>>          -j NFLOG --nflog-prefix "http: " --nflog-group 34
>>>
>>> will log the quota reached event but won't prevent further http
>>> traffic from going out.  One could instinctively add another rule
>>> right after the above one, something like:
>>>
>>> iptables -I OUTPUT -p http \
>>>          -m nfacct --nfacct-name http-limit --quota 10000 \
>>>          -j REJECT
>>>
>>> but that won't work either because the packet/byte could will be
>>> incremented twice.
>>
>> The usual workaround is to create custom chains to deal with this,
>> i.e.
>> iptables -N LOG_DROP_HTTP
>> iptables -A LOG_DROP_HTTP -j NFLOG --nflog-prefix "http: " --nflog-group 34
>> iptables -A LOG_DROP_HTTP -j REJECT
>> iptables -I OUTPUT -p http -m nfacct ... -j LOG_DROP_HTTP

I may have spoken too quickly.  With this solution a log message is
sent every time a packet over quota is received, something we
definitely want to avoid.  I was able to cover that case when sending
a notification from the match function.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Westphal Dec. 30, 2013, 11:31 p.m. UTC | #11
Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> >>> will log the quota reached event but won't prevent further http
> >>> traffic from going out.  One could instinctively add another rule
> >>> right after the above one, something like:
> >>>
> >>> iptables -I OUTPUT -p http \
> >>>          -m nfacct --nfacct-name http-limit --quota 10000 \
> >>>          -j REJECT
> >>>
> >>> but that won't work either because the packet/byte could will be
> >>> incremented twice.
> >>
> >> The usual workaround is to create custom chains to deal with this,
> >> i.e.
> >> iptables -N LOG_DROP_HTTP
> >> iptables -A LOG_DROP_HTTP -j NFLOG --nflog-prefix "http: " --nflog-group 34
> >> iptables -A LOG_DROP_HTTP -j REJECT
> >> iptables -I OUTPUT -p http -m nfacct ... -j LOG_DROP_HTTP
> 
> I may have spoken too quickly.  With this solution a log message is
> sent every time a packet over quota is received, something we
> definitely want to avoid.  I was able to cover that case when sending
> a notification from the match function.

I see.  I have no nice solution for this problem.  What could be done
is adding a --check-only option to nfacct to only query but not
increment the quota counter, then you could use the 'two-rules' approach
you described earlier.  (one rule to increment quotas per-packet but only match
exactly once when the current packet brings us over the quota, another
rule to 'passively' check against the limit).

Another option would be to using connmarks or connlabels to flag when
a connection is overlimit or has already been logged.  I understand
that this would be cumbersome though (also adds the conntrack dependency).
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 3, 2014, 3:54 p.m. UTC | #12
On Mon, Dec 30, 2013 at 04:14:21PM -0700, Mathieu Poirier wrote:
> On 30 December 2013 15:17, Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> > Hey Florian - I think that is an acceptable compromise.  The LOG chain
> > and rules are extra but it is setup only once and as such scale well.
> >
> > Thank you for that,
> > Mathieu
> >
> > On 30 December 2013 14:46, Florian Westphal <fw@strlen.de> wrote:
> >> Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> >>> Upon reaching the limit of 10000 byte of http traffic, any outgoing
> >>> http packets will be dropped and a single broadcast message will be
> >>> sent to user space.  That is because the match explicitly takes care
> >>> of sending the notification.
> >>>
> >>> With your proposal:
> >>>
> >>> iptables -I OUTPUT -p http \
> >>>          -m nfacct --nfacct-name http-limit --quota 10000 --match-once \
> >>>          -j NFLOG --nflog-prefix "http: " --nflog-group 34
> >>>
> >>> will log the quota reached event but won't prevent further http
> >>> traffic from going out.  One could instinctively add another rule
> >>> right after the above one, something like:
> >>>
> >>> iptables -I OUTPUT -p http \
> >>>          -m nfacct --nfacct-name http-limit --quota 10000 \
> >>>          -j REJECT
> >>>
> >>> but that won't work either because the packet/byte could will be
> >>> incremented twice.
> >>
> >> The usual workaround is to create custom chains to deal with this,
> >> i.e.
> >> iptables -N LOG_DROP_HTTP
> >> iptables -A LOG_DROP_HTTP -j NFLOG --nflog-prefix "http: " --nflog-group 34
> >> iptables -A LOG_DROP_HTTP -j REJECT
> >> iptables -I OUTPUT -p http -m nfacct ... -j LOG_DROP_HTTP
> 
> I may have spoken too quickly.  With this solution a log message is
> sent every time a packet over quota is received, something we
> definitely want to avoid.  I was able to cover that case when sending
> a notification from the match function.

Then, keep your code to send netlink notifications, I don't find any
better solution at this moment.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Jan. 3, 2014, 8:38 p.m. UTC | #13
...

> I think you can avoid this private area (including the spinlock) by
> storing a copy of the packet/byte counter before calling
> nfnl_acct_update(). Then if you note that old packet/byte counter was
> below quota but new is overquota, then you have to send an event, ie.
>
>         old_pkts = atomic64_read(info->nfacct->pkts);
>         new_pkts = nfnl_acct_update(skb, info->nfacct, info->flags);
>         if (old_pkts < info->quota && new_pkts > info->quota)
>                 send event
>
> There's still one problem with this approach since it is racy, as it
> may send several events, so you can refine it with:
>
>         if (old_pkts < info->quota && new_pkts > info->quota
>             && old_pkts + 1 == new_pkts)
>                 send event
>
> You will need two different functions depending on the quota mode
> (packet or bytes), and you will also need to modify nfnl_acct_update()
> to return the new number of packets or bytes (depending on the flags),
> you can use atomic_add_return and atomic_inc_return respectively to
> retrieve the new (updated) value.

Now that we've dealt with the logging issue we can concentrate on the
above suggestion that tries to avoid using a spinlock.  It works well
in packet mode but not so much in byte mode.

Whether working in byte or packet mode the solution is using packets
to determine who will send the notification.  When dealing with bytes
identifying which packet made the byte count go over quota is of prime
importance.  The problem is that the upgrade of packet and bytes in
"nfnl_acct_update()" is not atomic to the caller - there is always a
possibility that the thread will lose the CPU between incrementing
packet and bytes.  This could eventually lead to erroneous arithmetic
and an imprecise recognition of a quota attainment.

No matter how I turn things around in my head we need a spinlock -
either in nfnl_acct_update() or in the match function of xt_nfacct.c.

Comments are welcomed.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso Jan. 4, 2014, 2:32 a.m. UTC | #14
On Fri, Jan 03, 2014 at 01:38:45PM -0700, Mathieu Poirier wrote:
> ...
> 
> > I think you can avoid this private area (including the spinlock) by
> > storing a copy of the packet/byte counter before calling
> > nfnl_acct_update(). Then if you note that old packet/byte counter was
> > below quota but new is overquota, then you have to send an event, ie.
> >
> >         old_pkts = atomic64_read(info->nfacct->pkts);
> >         new_pkts = nfnl_acct_update(skb, info->nfacct, info->flags);
> >         if (old_pkts < info->quota && new_pkts > info->quota)
> >                 send event
> >
> > There's still one problem with this approach since it is racy, as it
> > may send several events, so you can refine it with:
> >
> >         if (old_pkts < info->quota && new_pkts > info->quota
> >             && old_pkts + 1 == new_pkts)
> >                 send event
> >
> > You will need two different functions depending on the quota mode
> > (packet or bytes), and you will also need to modify nfnl_acct_update()
> > to return the new number of packets or bytes (depending on the flags),
> > you can use atomic_add_return and atomic_inc_return respectively to
> > retrieve the new (updated) value.
> 
> Now that we've dealt with the logging issue we can concentrate on the
> above suggestion that tries to avoid using a spinlock.  It works well
> in packet mode but not so much in byte mode.
>
> Whether working in byte or packet mode the solution is using packets
> to determine who will send the notification.  When dealing with bytes
> identifying which packet made the byte count go over quota is of prime
> importance.  The problem is that the upgrade of packet and bytes in
> "nfnl_acct_update()" is not atomic to the caller - there is always a
> possibility that the thread will lose the CPU between incrementing
> packet and bytes.  This could eventually lead to erroneous arithmetic
> and an imprecise recognition of a quota attainment.

In the input and forwarding path, packets run in bh context, so that
won't happen.  That may only happen in the output path in process
context. However, I don't understand how this may lead to "erroneous
arithmetic" in the "quota attainment" yet, you have to elaborate your
concerns/scenario more specifically.

> No matter how I turn things around in my head we need a spinlock -
> either in nfnl_acct_update() or in the match function of xt_nfacct.c.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Jan. 13, 2014, 9:50 p.m. UTC | #15
I haven't heard back from my last post in which I detailed a scenario
where a race condition may occur on the output path.  Do you need more
information?  Is there something you'd like me to be more specific on?

Thanks,
Mathieu

On 6 January 2014 09:31, Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> On 3 January 2014 19:32, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> On Fri, Jan 03, 2014 at 01:38:45PM -0700, Mathieu Poirier wrote:
>>> ...
>>>
>>> > I think you can avoid this private area (including the spinlock) by
>>> > storing a copy of the packet/byte counter before calling
>>> > nfnl_acct_update(). Then if you note that old packet/byte counter was
>>> > below quota but new is overquota, then you have to send an event, ie.
>>> >
>>> >         old_pkts = atomic64_read(info->nfacct->pkts);
>>> >         new_pkts = nfnl_acct_update(skb, info->nfacct, info->flags);
>>> >         if (old_pkts < info->quota && new_pkts > info->quota)
>>> >                 send event
>>> >
>>> > There's still one problem with this approach since it is racy, as it
>>> > may send several events, so you can refine it with:
>>> >
>>> >         if (old_pkts < info->quota && new_pkts > info->quota
>>> >             && old_pkts + 1 == new_pkts)
>>> >                 send event
>>> >
>>> > You will need two different functions depending on the quota mode
>>> > (packet or bytes), and you will also need to modify nfnl_acct_update()
>>> > to return the new number of packets or bytes (depending on the flags),
>>> > you can use atomic_add_return and atomic_inc_return respectively to
>>> > retrieve the new (updated) value.
>>>
>>> Now that we've dealt with the logging issue we can concentrate on the
>>> above suggestion that tries to avoid using a spinlock.  It works well
>>> in packet mode but not so much in byte mode.
>>>
>>> Whether working in byte or packet mode the solution is using packets
>>> to determine who will send the notification.  When dealing with bytes
>>> identifying which packet made the byte count go over quota is of prime
>>> importance.  The problem is that the upgrade of packet and bytes in
>>> "nfnl_acct_update()" is not atomic to the caller - there is always a
>>> possibility that the thread will lose the CPU between incrementing
>>> packet and bytes.  This could eventually lead to erroneous arithmetic
>>> and an imprecise recognition of a quota attainment.
>>
>> In the input and forwarding path, packets run in bh context, so that
>> won't happen.  That may only happen in the output path in process
>> context. However, I don't understand how this may lead to "erroneous
>> arithmetic" in the "quota attainment" yet, you have to elaborate your
>> concerns/scenario more specifically.
>
> Given the following snippet:
>
>  1        old_bytes = atomic64_read(nfacct->bytes);
>  2        old_packets = atomic64_read(nfacct->pkts);
>  3        new_bytes = nfnl_acct_update(skb, info->nfacct, info->flags);
>  4        new_packets = atomic64_read(nfacct->pkts);
>
> 1. I think we have a problem between line 1 and 2.  Process A could be
> interrupted after line 1, process B upgrading both nfacct->bytes and
> nfacct->pkts and Process A resuming, setting bad metrics from the
> start.  This can be remedied by reading packets, then bytes and
> packets again, repeating the process until both packet count are
> equal.
>
> 2. I think we need to modify nfnl_acct_update() to take a byte and
> packet count rather than a flags.  That way both could be upgraded
> within the function.
>
> 3. After line 3 we have our new byte count and let's assume at that
> point old_packets is 100.  At that very moment the process (let's call
> it A) is suspended and the same code gets to run on another CPU in the
> system (process B), making nfacct->pkts == 101.  When process A
> resumes it will increment nfacct->pkts, making it 102, which will not
> be equal to old_packets + 1.  Here Process B will end up sending the
> notification when in fact it should be process A.
>
> My paranoia may come from a lack of understanding of how packets are
> processed in the system.
>
>>
>>> No matter how I turn things around in my head we need a spinlock -
>>> either in nfnl_acct_update() or in the match function of xt_nfacct.c.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index bb4bbc9..46a9dda 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -9,5 +9,9 @@  struct nf_acct;
 extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
 extern void nfnl_acct_put(struct nf_acct *acct);
 extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+extern u64 nfnl_acct_get_pkts(struct nf_acct *acct);
+extern u64 nfnl_acct_get_bytes(struct nf_acct *acct);
+extern int nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
+				u32 type, int event, struct nf_acct *acct);
 
 #endif /* _NFNL_ACCT_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 4a4efaf..e7e5851 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -18,6 +18,8 @@  enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_UPDATE	NFNLGRP_CONNTRACK_EXP_UPDATE
 	NFNLGRP_CONNTRACK_EXP_DESTROY,
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
+	NFNLGRP_CONNTRACK_QUOTA,
+#define NFNLGRP_CONNTRACK_QUOTA		NFNLGRP_CONNTRACK_QUOTA
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index c7b6269..ae8ea0a 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -19,6 +19,7 @@  enum nfnl_acct_type {
 	NFACCT_PKTS,
 	NFACCT_BYTES,
 	NFACCT_USE,
+	NFACCT_QUOTA,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
index 3e19c8a..c2e49a6 100644
--- a/include/uapi/linux/netfilter/xt_nfacct.h
+++ b/include/uapi/linux/netfilter/xt_nfacct.h
@@ -3,11 +3,22 @@ 
 
 #include <linux/netfilter/nfnetlink_acct.h>
 
+enum xt_quota_flags {
+	XT_QUOTA_INVERT    = 1 << 0,
+	XT_QUOTA_PACKET    = 1 << 1,
+	XT_QUOTA_QUOTA	   = 1 << 2,
+};
+
 struct nf_acct;
+struct nf_acct_quota;
 
 struct xt_nfacct_match_info {
 	char		name[NFACCT_NAME_MAX];
 	struct nf_acct	*nfacct;
+
+	u_int8_t flags;
+	aligned_u64 quota;
+	struct nf_acct_quota	*priv;
 };
 
 #endif /* _XT_NFACCT_MATCH_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6..aa635d8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1056,7 +1056,8 @@  config NETFILTER_XT_MATCH_NFACCT
 	select NETFILTER_NETLINK_ACCT
 	help
 	  This option allows you to use the extended accounting through
-	  nfnetlink_acct.
+	  nfnetlink_acct.  It is also possible to add quotas at the
+	  packet and byte level.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d46..20f1dad 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -92,7 +92,7 @@  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	return 0;
 }
 
-static int
+int
 nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		   int event, struct nf_acct *acct)
 {
@@ -134,6 +134,7 @@  nla_put_failure:
 	nlmsg_cancel(skb, nlh);
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nfnl_acct_fill_info);
 
 static int
 nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -336,6 +337,18 @@  void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
+u64 nfnl_acct_get_pkts(struct nf_acct *nfacct)
+{
+	return atomic64_read(&nfacct->pkts);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_get_pkts);
+
+u64 nfnl_acct_get_bytes(struct nf_acct *nfacct)
+{
+	return atomic64_read(&nfacct->bytes);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_get_bytes);
+
 static int __init nfnl_acct_init(void)
 {
 	int ret;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef..6ed807c 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -8,10 +8,14 @@ 
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/spinlock.h>
 
+#include <net/netlink.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/xt_nfacct.h>
+#include <linux/netfilter_ipv4/ipt_ULOG.h>
 
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
@@ -19,13 +23,62 @@  MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_nfacct");
 MODULE_ALIAS("ip6t_nfacct");
 
+struct nf_acct_quota {
+	spinlock_t lock;
+	bool quota_reached; /* true if quota has been reached. */
+};
+
+static void nfacct_quota_log(struct nf_acct *cur,
+			const struct sk_buff *skb)
+{
+	int ret;
+	struct sk_buff *skb2;
+
+	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb2 == NULL) {
+		pr_err("nfacct_quota_log: nlmsg_new failed\n");
+		return;
+	}
+
+	ret = nfnl_acct_fill_info(skb2, 0, 0, NFACCT_QUOTA,
+						NFNL_MSG_ACCT_NEW, cur);
+
+	if (ret <= 0) {
+		kfree_skb(skb2);
+		pr_err("nfacct_quota_log: nfnl_acct_fill_info failed\n");
+		return;
+	}
+
+	netlink_broadcast(init_net.nfnl, skb2, 0,
+					 NFNLGRP_CONNTRACK_QUOTA, GFP_ATOMIC);
+}
+
 static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	const struct xt_nfacct_match_info *info = par->targinfo;
+	u64 val;
+	struct xt_nfacct_match_info *info = (void *) par->targinfo;
+	bool ret = info->flags & XT_QUOTA_INVERT;
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	return true;
+	if (info->flags & XT_QUOTA_QUOTA) {
+		spin_lock_bh(&info->priv->lock);
+		val = (info->flags & XT_QUOTA_PACKET) ?
+				nfnl_acct_get_pkts(info->nfacct) :
+				nfnl_acct_get_bytes(info->nfacct);
+		if (val <= info->quota) {
+			ret = !ret;
+			info->priv->quota_reached = false;
+		} else {
+			if (!info->priv->quota_reached) {
+				info->priv->quota_reached = true;
+				nfacct_quota_log(info->nfacct, skb);
+			}
+		}
+		spin_unlock_bh(&info->priv->lock);
+	}
+
+	return ret;
 }
 
 static int
@@ -40,7 +93,14 @@  nfacct_mt_checkentry(const struct xt_mtchk_param *par)
 			"does not exists\n", info->name);
 		return -ENOENT;
 	}
+
 	info->nfacct = nfacct;
+
+	if ((info->flags & XT_QUOTA_QUOTA) && !info->priv) {
+		info->priv =  kmalloc(sizeof(struct nf_acct_quota), GFP_KERNEL);
+		spin_lock_init(&info->priv->lock);
+	}
+
 	return 0;
 }
 
@@ -50,6 +110,7 @@  nfacct_mt_destroy(const struct xt_mtdtor_param *par)
 	const struct xt_nfacct_match_info *info = par->matchinfo;
 
 	nfnl_acct_put(info->nfacct);
+	kfree(info->priv);
 }
 
 static struct xt_match nfacct_mt_reg __read_mostly = {