diff mbox

[v5] netfilter: xtables: add quota support for nfacct

Message ID 1392740402-14952-1-git-send-email-mathieu.poirier@linaro.org
State Superseded
Headers show

Commit Message

Mathieu Poirier Feb. 18, 2014, 4:20 p.m. UTC
From: Mathieu Poirier <mathieu.poirier@linaro.org>

Adding packet and byte quota support.  Once a quota has been
reached a noticifaction is sent to user space that includes
the name of the accounting object along with the current byte
and packet count.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
---
Changes for v5:
- Removed spinlock from 'nfnl_acct_update'.
- Added accounting object store.
---
 include/linux/netfilter/nfnetlink_acct.h      |  17 ++-
 include/uapi/linux/netfilter/nfnetlink.h      |   2 +
 include/uapi/linux/netfilter/nfnetlink_acct.h |   1 +
 include/uapi/linux/netfilter/xt_nfacct.h      |  18 ++-
 net/netfilter/Kconfig                         |   3 +-
 net/netfilter/nfnetlink_acct.c                |  46 +++++--
 net/netfilter/xt_nfacct.c                     | 179 ++++++++++++++++++++++++--
 7 files changed, 238 insertions(+), 28 deletions(-)

Comments

Pablo Neira Ayuso Feb. 25, 2014, 5:54 p.m. UTC | #1
Hi Mathieu,

This is looking better, but still more comments to address, see below.

On Tue, Feb 18, 2014 at 09:20:02AM -0700, mathieu.poirier@linaro.org wrote:
> From: Mathieu Poirier <mathieu.poirier@linaro.org>
> 
> Adding packet and byte quota support.  Once a quota has been
> reached a noticifaction is sent to user space that includes
> the name of the accounting object along with the current byte
> and packet count.
> 
> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> ---
> Changes for v5:
> - Removed spinlock from 'nfnl_acct_update'.
> - Added accounting object store.
> ---
>  include/linux/netfilter/nfnetlink_acct.h      |  17 ++-
>  include/uapi/linux/netfilter/nfnetlink.h      |   2 +
>  include/uapi/linux/netfilter/nfnetlink_acct.h |   1 +
>  include/uapi/linux/netfilter/xt_nfacct.h      |  18 ++-
>  net/netfilter/Kconfig                         |   3 +-
>  net/netfilter/nfnetlink_acct.c                |  46 +++++--
>  net/netfilter/xt_nfacct.c                     | 179 ++++++++++++++++++++++++--
>  7 files changed, 238 insertions(+), 28 deletions(-)
> 
> diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
> index b2e85e5..bb04204 100644
> --- a/include/linux/netfilter/nfnetlink_acct.h
> +++ b/include/linux/netfilter/nfnetlink_acct.h
> @@ -3,11 +3,24 @@
>  
>  #include <uapi/linux/netfilter/nfnetlink_acct.h>
>  
> +struct nf_acct {
> +	atomic64_t		pkts;
> +	atomic64_t		bytes;
> +	struct list_head	head;
> +	atomic_t		refcnt;
> +	char			name[NFACCT_NAME_MAX];
> +	struct rcu_head		rcu_head;
> +};
>  
> -struct nf_acct;
> +enum nfnl_acct_udt_type {
> +	NFNL_ACCT_UDT_PACKETS,
> +	NFNL_ACCT_UDT_BYTES,
> +};

I prefer some like this instead of the one above:

enum nfacct_quota_type {
        NFACCT_QUOTA_PACKETS = 0,
        NFACCT_QUOTA_BYTES
};

>  struct nf_acct *nfnl_acct_find_get(const char *filter_name);
>  void nfnl_acct_put(struct nf_acct *acct);
> -void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
> +extern u64 nfnl_acct_update(const struct sk_buff *skb,
> +			   struct nf_acct *nfacct, int mode);
> +void nfnl_quota_event(struct nf_acct *cur);
>  
>  #endif /* _NFNL_ACCT_H */
> diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
> index 596ddd4..354a7e5 100644
> --- a/include/uapi/linux/netfilter/nfnetlink.h
> +++ b/include/uapi/linux/netfilter/nfnetlink.h
> @@ -20,6 +20,8 @@ enum nfnetlink_groups {
>  #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
>  	NFNLGRP_NFTABLES,
>  #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
> +	NFNLGRP_ACCT_QUOTA,
> +#define NFNLGRP_ACCT_QUOTA		NFNLGRP_ACCT_QUOTA
>  	__NFNLGRP_MAX,
>  };
>  #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
> index c7b6269..ae8ea0a 100644
> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
> @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>  	NFACCT_PKTS,
>  	NFACCT_BYTES,
>  	NFACCT_USE,
> +	NFACCT_QUOTA,

This is wrong. You are using this new attribute (which defines another
TLV in the payload of a message) instead of the netlink message type
in nfnl_acct_fill_info(). This needs a specific message type to
announce that the quota has been exceeded.

>  	__NFACCT_MAX
>  };
>  #define NFACCT_MAX (__NFACCT_MAX - 1)
> diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
> index 3e19c8a..7c35ce0 100644
> --- a/include/uapi/linux/netfilter/xt_nfacct.h
> +++ b/include/uapi/linux/netfilter/xt_nfacct.h
> @@ -3,11 +3,27 @@
>  
>  #include <linux/netfilter/nfnetlink_acct.h>
>  
> +enum xt_quota_flags {
> +	XT_NFACCT_QUOTA_PKTS	= 1 << 0,
> +	XT_NFACCT_QUOTA		= 1 << 1,
> +};
> +
>  struct nf_acct;
> +struct nf_acct_quota;
>  
>  struct xt_nfacct_match_info {
>  	char		name[NFACCT_NAME_MAX];
> -	struct nf_acct	*nfacct;
> +	struct nf_acct	*nfacct __aligned(8);
>  };
>  
> +struct xt_nfacct_match_info_v1 {
> +	char		name[NFACCT_NAME_MAX];
> +	struct nf_acct	*nfacct __aligned(8);

You have to move at the bottom of the structure, before the new
nf_acct_quota.

The extension from userspace sets the .userspacesize field, which
helps iptables to skip internal pointers that are set by the kernel,
If you don't fix this, iptables -D to delete a rule with nfacct will
not work.

> +	__u32 flags;
> +	__aligned_u64 quota;
> +
> +	/* used internally by kernel */
> +	struct nf_acct_quota *priv __aligned(8);
> +};
>  #endif /* _XT_NFACCT_MATCH_H */
> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
> index 748f304..ce184951 100644
> --- a/net/netfilter/Kconfig
> +++ b/net/netfilter/Kconfig
> @@ -1108,7 +1108,8 @@ config NETFILTER_XT_MATCH_NFACCT
>  	select NETFILTER_NETLINK_ACCT
>  	help
>  	  This option allows you to use the extended accounting through
> -	  nfnetlink_acct.
> +	  nfnetlink_acct.  It is also possible to add quotas at the
> +	  packet and byte level.
>  
>  	  To compile it as a module, choose M here.  If unsure, say N.
>  
> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
> index c7b6d46..5c580a7 100644
> --- a/net/netfilter/nfnetlink_acct.c
> +++ b/net/netfilter/nfnetlink_acct.c
> @@ -29,15 +29,6 @@ MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
>  
>  static LIST_HEAD(nfnl_acct_list);
>  
> -struct nf_acct {
> -	atomic64_t		pkts;
> -	atomic64_t		bytes;
> -	struct list_head	head;
> -	atomic_t		refcnt;
> -	char			name[NFACCT_NAME_MAX];
> -	struct rcu_head		rcu_head;
> -};
> -
>  static int
>  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>  	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
> @@ -92,7 +83,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>  	return 0;
>  }
>  
> -static int
> +int
>  nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
>  		   int event, struct nf_acct *acct)
>  {
> @@ -134,6 +125,7 @@ nla_put_failure:
>  	nlmsg_cancel(skb, nlh);
>  	return -1;
>  }
> +EXPORT_SYMBOL_GPL(nfnl_acct_fill_info);

I think you don't need to export this symbol.

>  static int
>  nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
> @@ -329,13 +321,41 @@ void nfnl_acct_put(struct nf_acct *acct)
>  }
>  EXPORT_SYMBOL_GPL(nfnl_acct_put);
>  
> -void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
> +u64 nfnl_acct_update(const struct sk_buff *skb,
> +		    struct nf_acct *nfacct, int mode)
>  {
> -	atomic64_inc(&nfacct->pkts);
> -	atomic64_add(skb->len, &nfacct->bytes);
> +	u64 pkts, bytes;
> +
> +	pkts = atomic64_inc_return(&nfacct->pkts);
> +	bytes = atomic64_add_return(skb->len, &nfacct->bytes);
> +
> +	return (mode == NFNL_ACCT_UDT_PACKETS) ? pkts : bytes;
>  }
>  EXPORT_SYMBOL_GPL(nfnl_acct_update);
>  
> +void
> +nfnl_quota_event(struct nf_acct *cur)
> +{
> +	int ret;
> +	struct sk_buff *skb;
> +
> +	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
> +	if (skb == NULL)
> +		return;
> +
> +	ret = nfnl_acct_fill_info(skb, 0, 0, NFACCT_QUOTA,
> +				 NFNL_MSG_ACCT_NEW, cur);
> +

Remove empty line above.

> +	if (ret <= 0) {
> +		kfree_skb(skb);
> +		return;
> +	}
> +
> +	netlink_broadcast(init_net.nfnl, skb, 0,
> +			 NFNLGRP_ACCT_QUOTA, GFP_ATOMIC);
> +}
> +EXPORT_SYMBOL_GPL(nfnl_quota_event);
> +
>  static int __init nfnl_acct_init(void)
>  {
>  	int ret;
> diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
> index b3be0ef..d1bd5ea 100644
> --- a/net/netfilter/xt_nfacct.c
> +++ b/net/netfilter/xt_nfacct.c
> @@ -9,8 +9,10 @@
>  #include <linux/module.h>
>  #include <linux/skbuff.h>
>  
> +#include <net/netlink.h>
>  #include <linux/netfilter/x_tables.h>
>  #include <linux/netfilter/nfnetlink_acct.h>
> +#include <linux/netfilter/nfnetlink.h>
>  #include <linux/netfilter/xt_nfacct.h>
>  
>  MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
> @@ -19,15 +21,62 @@ MODULE_LICENSE("GPL");
>  MODULE_ALIAS("ipt_nfacct");
>  MODULE_ALIAS("ip6t_nfacct");
>  
> +static LIST_HEAD(nfacct_list);
> +static DEFINE_MUTEX(nfacct_list_mutex);
> +
> +struct nf_acct_quota {
> +	char			name[NFACCT_NAME_MAX];
> +	int			count;
> +	u32			flags;
> +	u64			quota;
> +	struct list_head	node;
> +	atomic_t		notified;
> +};
> +
>  static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
>  {
>  	const struct xt_nfacct_match_info *info = par->targinfo;
> +	u64 __always_unused val;
> -	nfnl_acct_update(skb, info->nfacct);
> +	val = nfnl_acct_update(skb, info->nfacct, 0);

I think you can just do:

        nfnl_acct_update(skb, info->nfacct...);

and ignore the returned value in this case.

>  	return true;
>  }
>  
> +static bool
> +nfacct_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
> +{
> +	int mode;
> +	u64 val;
> +	const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
> +	struct nf_acct_quota *acct_quota = info->priv;
> +	bool ret = true;
> +
> +	mode = (acct_quota->flags & XT_NFACCT_QUOTA_PKTS) ?
> +	       NFNL_ACCT_UDT_PACKETS : NFNL_ACCT_UDT_BYTES;
> +
> +	/* upgrade accounting stats */
> +	val = nfnl_acct_update(skb, info->nfacct, mode);
> +
> +	/* no need to go further if we don't have a quota */
> +	if (!(acct_quota->flags & XT_NFACCT_QUOTA))
> +		return ret;
> +
> +	/* are we over the limit ? */
> +	if (val <= info->quota) {

You use <=, I think this should be <. Not further below you're using >=.

> +		/* reset flag in case userspace cleared the counters */
> +		atomic_set(&acct_quota->notified, 0)

This has runtime overhead and I think it's racy. I think we can avoid
this if the quota object management is moved to nfnetlink_acct (see
below for further explanation). The idea is to iterate over the list
of quota objects (to find notified flags that need to be reset) by
when NFNL_MSG_ACCT_GET_CTRZERO is called.

> +		ret = !ret;
> +	}
> +
> +	if (val >= info->quota)

You have to use brackets here:

        if (...) {
                ...
        }

> +		/* cmpxchg guarantees only one CPU can send a notification */
> +		if (atomic_cmpxchg(&acct_quota->notified, 0, 1) == 0)
> +			nfnl_quota_event(info->nfacct);
> +
> +	return ret;
> +}
> +
>  static int
>  nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>  {
> @@ -44,32 +93,140 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>  	return 0;
>  }
>  
> +static
> +struct nf_acct_quota *nfacct_find_quota(char *name)
> +{
> +	struct nf_acct_quota *curr, *match = NULL;
> +
> +	mutex_lock(&nfacct_list_mutex);
> +	list_for_each_entry(curr, &nfacct_list, node)

missing brackets here in list_for_each_entry too.

> +		if (strncmp(curr->name, name, NFACCT_NAME_MAX) == 0) {
> +			match = curr;
> +			match->count++;
> +			break;
> +		}
> +
> +	mutex_unlock(&nfacct_list_mutex);
> +
> +	return match;
> +}
> +
> +static struct  nf_acct_quota *
> +nfacct_create_quota(struct xt_nfacct_match_info_v1 *info)

Thinking of nftables and the reset notified flag above, I think it's
better if you move the quota object management to nfnetlink_acct.c by
adding new nfacct commands. The idea is to maintain a quota object
base that can be used by xt_nfacct. You have to add the following
commands:

enum nfnl_acct_msg_types {
        ...
        NFNL_MSG_ACCT_QUOTA_NEW,
        NFNL_MSG_ACCT_QUOTA_GET,
        NFNL_MSG_ACCT_QUOTA_DEL,
        NFNL_MSG_ACCT_MAX
};

Then, extend nfnl_callback to include this new commands. The idea is
to create/delete quota objects from the netlink interface, then attach
them from the iptables nfacct extension.

You won't need the mutex anymore btw, since it's already protected by
nfnl_lock.

> +{
> +	struct nf_acct_quota *acct_quota;
> +
> +	mutex_lock(&nfacct_list_mutex);
> +
> +	acct_quota = kmalloc(sizeof(struct nf_acct_quota), GFP_ATOMIC);
> +

remove empty line above.

> +	if (!acct_quota) {
> +		mutex_unlock(&nfacct_list_mutex);
> +		return NULL;
> +	}
> +
> +	strncpy(acct_quota->name, info->name, NFACCT_NAME_MAX);
> +	acct_quota->flags = info->flags;
> +	acct_quota->quota = info->quota;
> +	acct_quota->count = 1;
> +	atomic_set(&acct_quota->notified, 0);
> +	list_add_tail(&acct_quota->node, &nfacct_list);

This needs to be converted to _rcu.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier Feb. 26, 2014, 6:38 p.m. UTC | #2
On 25 February 2014 10:54, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> Hi Mathieu,
>
> This is looking better, but still more comments to address, see below.
>
> On Tue, Feb 18, 2014 at 09:20:02AM -0700, mathieu.poirier@linaro.org wrote:
>> From: Mathieu Poirier <mathieu.poirier@linaro.org>
>>
>> Adding packet and byte quota support.  Once a quota has been
>> reached a noticifaction is sent to user space that includes
>> the name of the accounting object along with the current byte
>> and packet count.
>>
>> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
>> ---
>> Changes for v5:
>> - Removed spinlock from 'nfnl_acct_update'.
>> - Added accounting object store.
>> ---
>>  include/linux/netfilter/nfnetlink_acct.h      |  17 ++-
>>  include/uapi/linux/netfilter/nfnetlink.h      |   2 +
>>  include/uapi/linux/netfilter/nfnetlink_acct.h |   1 +
>>  include/uapi/linux/netfilter/xt_nfacct.h      |  18 ++-
>>  net/netfilter/Kconfig                         |   3 +-
>>  net/netfilter/nfnetlink_acct.c                |  46 +++++--
>>  net/netfilter/xt_nfacct.c                     | 179 ++++++++++++++++++++++++--
>>  7 files changed, 238 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
>> index b2e85e5..bb04204 100644
>> --- a/include/linux/netfilter/nfnetlink_acct.h
>> +++ b/include/linux/netfilter/nfnetlink_acct.h
>> @@ -3,11 +3,24 @@
>>
>>  #include <uapi/linux/netfilter/nfnetlink_acct.h>
>>
>> +struct nf_acct {
>> +     atomic64_t              pkts;
>> +     atomic64_t              bytes;
>> +     struct list_head        head;
>> +     atomic_t                refcnt;
>> +     char                    name[NFACCT_NAME_MAX];
>> +     struct rcu_head         rcu_head;
>> +};
>>
>> -struct nf_acct;
>> +enum nfnl_acct_udt_type {
>> +     NFNL_ACCT_UDT_PACKETS,
>> +     NFNL_ACCT_UDT_BYTES,
>> +};
>
> I prefer some like this instead of the one above:
>
> enum nfacct_quota_type {
>         NFACCT_QUOTA_PACKETS = 0,
>         NFACCT_QUOTA_BYTES
> };
>
>>  struct nf_acct *nfnl_acct_find_get(const char *filter_name);
>>  void nfnl_acct_put(struct nf_acct *acct);
>> -void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
>> +extern u64 nfnl_acct_update(const struct sk_buff *skb,
>> +                        struct nf_acct *nfacct, int mode);
>> +void nfnl_quota_event(struct nf_acct *cur);
>>
>>  #endif /* _NFNL_ACCT_H */
>> diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
>> index 596ddd4..354a7e5 100644
>> --- a/include/uapi/linux/netfilter/nfnetlink.h
>> +++ b/include/uapi/linux/netfilter/nfnetlink.h
>> @@ -20,6 +20,8 @@ enum nfnetlink_groups {
>>  #define NFNLGRP_CONNTRACK_EXP_DESTROY        NFNLGRP_CONNTRACK_EXP_DESTROY
>>       NFNLGRP_NFTABLES,
>>  #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
>> +     NFNLGRP_ACCT_QUOTA,
>> +#define NFNLGRP_ACCT_QUOTA           NFNLGRP_ACCT_QUOTA
>>       __NFNLGRP_MAX,
>>  };
>>  #define NFNLGRP_MAX  (__NFNLGRP_MAX - 1)
>> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
>> index c7b6269..ae8ea0a 100644
>> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
>> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
>> @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>>       NFACCT_PKTS,
>>       NFACCT_BYTES,
>>       NFACCT_USE,
>> +     NFACCT_QUOTA,
>
> This is wrong. You are using this new attribute (which defines another
> TLV in the payload of a message) instead of the netlink message type
> in nfnl_acct_fill_info(). This needs a specific message type to
> announce that the quota has been exceeded.

If I understand you correctly you want a new entry in
"nfnl_acct_msg_types" - please confirm.

>
>>       __NFACCT_MAX
>>  };
>>  #define NFACCT_MAX (__NFACCT_MAX - 1)
>> diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
>> index 3e19c8a..7c35ce0 100644
>> --- a/include/uapi/linux/netfilter/xt_nfacct.h
>> +++ b/include/uapi/linux/netfilter/xt_nfacct.h
>> @@ -3,11 +3,27 @@
>>
>>  #include <linux/netfilter/nfnetlink_acct.h>
>>
>> +enum xt_quota_flags {
>> +     XT_NFACCT_QUOTA_PKTS    = 1 << 0,
>> +     XT_NFACCT_QUOTA         = 1 << 1,
>> +};
>> +
>>  struct nf_acct;
>> +struct nf_acct_quota;
>>
>>  struct xt_nfacct_match_info {
>>       char            name[NFACCT_NAME_MAX];
>> -     struct nf_acct  *nfacct;
>> +     struct nf_acct  *nfacct __aligned(8);
>>  };
>>
>> +struct xt_nfacct_match_info_v1 {
>> +     char            name[NFACCT_NAME_MAX];
>> +     struct nf_acct  *nfacct __aligned(8);
>
> You have to move at the bottom of the structure, before the new
> nf_acct_quota.
>
> The extension from userspace sets the .userspacesize field, which
> helps iptables to skip internal pointers that are set by the kernel,
> If you don't fix this, iptables -D to delete a rule with nfacct will
> not work.

Thanks for pointing that out - I've been experiencing problems with
"iptables -D".

>
>> +     __u32 flags;
>> +     __aligned_u64 quota;
>> +
>> +     /* used internally by kernel */
>> +     struct nf_acct_quota *priv __aligned(8);
>> +};
>>  #endif /* _XT_NFACCT_MATCH_H */
>> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
>> index 748f304..ce184951 100644
>> --- a/net/netfilter/Kconfig
>> +++ b/net/netfilter/Kconfig
>> @@ -1108,7 +1108,8 @@ config NETFILTER_XT_MATCH_NFACCT
>>       select NETFILTER_NETLINK_ACCT
>>       help
>>         This option allows you to use the extended accounting through
>> -       nfnetlink_acct.
>> +       nfnetlink_acct.  It is also possible to add quotas at the
>> +       packet and byte level.
>>
>>         To compile it as a module, choose M here.  If unsure, say N.
>>
>> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
>> index c7b6d46..5c580a7 100644
>> --- a/net/netfilter/nfnetlink_acct.c
>> +++ b/net/netfilter/nfnetlink_acct.c
>> @@ -29,15 +29,6 @@ MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
>>
>>  static LIST_HEAD(nfnl_acct_list);
>>
>> -struct nf_acct {
>> -     atomic64_t              pkts;
>> -     atomic64_t              bytes;
>> -     struct list_head        head;
>> -     atomic_t                refcnt;
>> -     char                    name[NFACCT_NAME_MAX];
>> -     struct rcu_head         rcu_head;
>> -};
>> -
>>  static int
>>  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>>            const struct nlmsghdr *nlh, const struct nlattr * const tb[])
>> @@ -92,7 +83,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>>       return 0;
>>  }
>>
>> -static int
>> +int
>>  nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
>>                  int event, struct nf_acct *acct)
>>  {
>> @@ -134,6 +125,7 @@ nla_put_failure:
>>       nlmsg_cancel(skb, nlh);
>>       return -1;
>>  }
>> +EXPORT_SYMBOL_GPL(nfnl_acct_fill_info);
>
> I think you don't need to export this symbol.
>
>>  static int
>>  nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
>> @@ -329,13 +321,41 @@ void nfnl_acct_put(struct nf_acct *acct)
>>  }
>>  EXPORT_SYMBOL_GPL(nfnl_acct_put);
>>
>> -void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
>> +u64 nfnl_acct_update(const struct sk_buff *skb,
>> +                 struct nf_acct *nfacct, int mode)
>>  {
>> -     atomic64_inc(&nfacct->pkts);
>> -     atomic64_add(skb->len, &nfacct->bytes);
>> +     u64 pkts, bytes;
>> +
>> +     pkts = atomic64_inc_return(&nfacct->pkts);
>> +     bytes = atomic64_add_return(skb->len, &nfacct->bytes);
>> +
>> +     return (mode == NFNL_ACCT_UDT_PACKETS) ? pkts : bytes;
>>  }
>>  EXPORT_SYMBOL_GPL(nfnl_acct_update);
>>
>> +void
>> +nfnl_quota_event(struct nf_acct *cur)
>> +{
>> +     int ret;
>> +     struct sk_buff *skb;
>> +
>> +     skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
>> +     if (skb == NULL)
>> +             return;
>> +
>> +     ret = nfnl_acct_fill_info(skb, 0, 0, NFACCT_QUOTA,
>> +                              NFNL_MSG_ACCT_NEW, cur);
>> +
>
> Remove empty line above.
>
>> +     if (ret <= 0) {
>> +             kfree_skb(skb);
>> +             return;
>> +     }
>> +
>> +     netlink_broadcast(init_net.nfnl, skb, 0,
>> +                      NFNLGRP_ACCT_QUOTA, GFP_ATOMIC);
>> +}
>> +EXPORT_SYMBOL_GPL(nfnl_quota_event);
>> +
>>  static int __init nfnl_acct_init(void)
>>  {
>>       int ret;
>> diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
>> index b3be0ef..d1bd5ea 100644
>> --- a/net/netfilter/xt_nfacct.c
>> +++ b/net/netfilter/xt_nfacct.c
>> @@ -9,8 +9,10 @@
>>  #include <linux/module.h>
>>  #include <linux/skbuff.h>
>>
>> +#include <net/netlink.h>
>>  #include <linux/netfilter/x_tables.h>
>>  #include <linux/netfilter/nfnetlink_acct.h>
>> +#include <linux/netfilter/nfnetlink.h>
>>  #include <linux/netfilter/xt_nfacct.h>
>>
>>  MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
>> @@ -19,15 +21,62 @@ MODULE_LICENSE("GPL");
>>  MODULE_ALIAS("ipt_nfacct");
>>  MODULE_ALIAS("ip6t_nfacct");
>>
>> +static LIST_HEAD(nfacct_list);
>> +static DEFINE_MUTEX(nfacct_list_mutex);
>> +
>> +struct nf_acct_quota {
>> +     char                    name[NFACCT_NAME_MAX];
>> +     int                     count;
>> +     u32                     flags;
>> +     u64                     quota;
>> +     struct list_head        node;
>> +     atomic_t                notified;
>> +};
>> +
>>  static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
>>  {
>>       const struct xt_nfacct_match_info *info = par->targinfo;
>> +     u64 __always_unused val;
>> -     nfnl_acct_update(skb, info->nfacct);
>> +     val = nfnl_acct_update(skb, info->nfacct, 0);
>
> I think you can just do:
>
>         nfnl_acct_update(skb, info->nfacct...);
>
> and ignore the returned value in this case.
>
>>       return true;
>>  }
>>
>> +static bool
>> +nfacct_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
>> +{
>> +     int mode;
>> +     u64 val;
>> +     const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
>> +     struct nf_acct_quota *acct_quota = info->priv;
>> +     bool ret = true;
>> +
>> +     mode = (acct_quota->flags & XT_NFACCT_QUOTA_PKTS) ?
>> +            NFNL_ACCT_UDT_PACKETS : NFNL_ACCT_UDT_BYTES;
>> +
>> +     /* upgrade accounting stats */
>> +     val = nfnl_acct_update(skb, info->nfacct, mode);
>> +
>> +     /* no need to go further if we don't have a quota */
>> +     if (!(acct_quota->flags & XT_NFACCT_QUOTA))
>> +             return ret;
>> +
>> +     /* are we over the limit ? */
>> +     if (val <= info->quota) {
>
> You use <=, I think this should be <. Not further below you're using >=.

That is the intended purpose - I take the limit to be inclusive.  If
you have a limit of 200 packets, you want the packet that makes the
count go from 199 to 200 to be allowed through.  At the same time you
want the the notification to be sent out right away since you don't
know how long it will take to receive the next packet.  Get back to me
on that if you don't agree with my logic.

>
>> +             /* reset flag in case userspace cleared the counters */
>> +             atomic_set(&acct_quota->notified, 0)
>
> This has runtime overhead and I think it's racy.

Good catch on the racy part.

 I think we can avoid
> this if the quota object management is moved to nfnetlink_acct (see
> below for further explanation). The idea is to iterate over the list
> of quota objects (to find notified flags that need to be reset) by
> when NFNL_MSG_ACCT_GET_CTRZERO is called.
>
>> +             ret = !ret;
>> +     }
>> +
>> +     if (val >= info->quota)
>
> You have to use brackets here:
>
>         if (...) {
>                 ...
>         }
>
>> +             /* cmpxchg guarantees only one CPU can send a notification */
>> +             if (atomic_cmpxchg(&acct_quota->notified, 0, 1) == 0)
>> +                     nfnl_quota_event(info->nfacct);
>> +
>> +     return ret;
>> +}
>> +
>>  static int
>>  nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>>  {
>> @@ -44,32 +93,140 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>>       return 0;
>>  }
>>
>> +static
>> +struct nf_acct_quota *nfacct_find_quota(char *name)
>> +{
>> +     struct nf_acct_quota *curr, *match = NULL;
>> +
>> +     mutex_lock(&nfacct_list_mutex);
>> +     list_for_each_entry(curr, &nfacct_list, node)
>
> missing brackets here in list_for_each_entry too.
>
>> +             if (strncmp(curr->name, name, NFACCT_NAME_MAX) == 0) {
>> +                     match = curr;
>> +                     match->count++;
>> +                     break;
>> +             }
>> +
>> +     mutex_unlock(&nfacct_list_mutex);
>> +
>> +     return match;
>> +}
>> +
>> +static struct  nf_acct_quota *
>> +nfacct_create_quota(struct xt_nfacct_match_info_v1 *info)
>
> Thinking of nftables and the reset notified flag above, I think it's
> better if you move the quota object management to nfnetlink_acct.c by
> adding new nfacct commands. The idea is to maintain a quota object
> base that can be used by xt_nfacct. You have to add the following
> commands:
>
> enum nfnl_acct_msg_types {
>         ...
>         NFNL_MSG_ACCT_QUOTA_NEW,
>         NFNL_MSG_ACCT_QUOTA_GET,
>         NFNL_MSG_ACCT_QUOTA_DEL,
>         NFNL_MSG_ACCT_MAX
> };
>
> Then, extend nfnl_callback to include this new commands. The idea is
> to create/delete quota objects from the netlink interface, then attach
> them from the iptables nfacct extension.
>
> You won't need the mutex anymore btw, since it's already protected by
> nfnl_lock.

Dealing with the overhead introduced by clearing the "notified" flag
is completely different from the management of "quota" objects.  To do
this the best solution is to introduce a callback to reset the flag
when a "nfacct" object gets reset.  That would be simple and in line
with what is used ubiquitously in the kernel.

Moving the management of "quota" objects to nfnetlink_acct is a
completely different ball game.  Not only that, it would also mandate
to make changes to nfacct, libnfnl and libnl.  It would also require
to pull a few tricks to make all that extra code compile conditionally
when XT_QUOTA is not selected, unless we want to automatically select
XT_QUOTA when NETLINK_ACCT gets enabled.  And what happens when
another quota type-of-thing comes along?  We keep stuffing "nfacct"
and "nfnetlink_acct.c" with more functionality?

To me introducing a callback for each of the operation supported by
"nfnl_acct_cb" is the way to go.  It would allow any future extension
so take action whenever something happens to an "nfacct" object.

I'll continue thinking about this but take my proposal into account
and get back to me.

>
>> +{
>> +     struct nf_acct_quota *acct_quota;
>> +
>> +     mutex_lock(&nfacct_list_mutex);
>> +
>> +     acct_quota = kmalloc(sizeof(struct nf_acct_quota), GFP_ATOMIC);
>> +
>
> remove empty line above.
>
>> +     if (!acct_quota) {
>> +             mutex_unlock(&nfacct_list_mutex);
>> +             return NULL;
>> +     }
>> +
>> +     strncpy(acct_quota->name, info->name, NFACCT_NAME_MAX);
>> +     acct_quota->flags = info->flags;
>> +     acct_quota->quota = info->quota;
>> +     acct_quota->count = 1;
>> +     atomic_set(&acct_quota->notified, 0);
>> +     list_add_tail(&acct_quota->node, &nfacct_list);
>
> This needs to be converted to _rcu.

You mean in the context that we move forward with your suggestion?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier March 10, 2014, 8:14 p.m. UTC | #3
I haven't heard back on this for 12 full days - is there anything
you'd like me to clarify?  If you are busy can you at least provide me
with a date by which I should expect an answer?

Mathieu

On 26 February 2014 11:38, Mathieu Poirier <mathieu.poirier@linaro.org> wrote:
> On 25 February 2014 10:54, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
>> Hi Mathieu,
>>
>> This is looking better, but still more comments to address, see below.
>>
>> On Tue, Feb 18, 2014 at 09:20:02AM -0700, mathieu.poirier@linaro.org wrote:
>>> From: Mathieu Poirier <mathieu.poirier@linaro.org>
>>>
>>> Adding packet and byte quota support.  Once a quota has been
>>> reached a noticifaction is sent to user space that includes
>>> the name of the accounting object along with the current byte
>>> and packet count.
>>>
>>> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
>>> ---
>>> Changes for v5:
>>> - Removed spinlock from 'nfnl_acct_update'.
>>> - Added accounting object store.
>>> ---
>>>  include/linux/netfilter/nfnetlink_acct.h      |  17 ++-
>>>  include/uapi/linux/netfilter/nfnetlink.h      |   2 +
>>>  include/uapi/linux/netfilter/nfnetlink_acct.h |   1 +
>>>  include/uapi/linux/netfilter/xt_nfacct.h      |  18 ++-
>>>  net/netfilter/Kconfig                         |   3 +-
>>>  net/netfilter/nfnetlink_acct.c                |  46 +++++--
>>>  net/netfilter/xt_nfacct.c                     | 179 ++++++++++++++++++++++++--
>>>  7 files changed, 238 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
>>> index b2e85e5..bb04204 100644
>>> --- a/include/linux/netfilter/nfnetlink_acct.h
>>> +++ b/include/linux/netfilter/nfnetlink_acct.h
>>> @@ -3,11 +3,24 @@
>>>
>>>  #include <uapi/linux/netfilter/nfnetlink_acct.h>
>>>
>>> +struct nf_acct {
>>> +     atomic64_t              pkts;
>>> +     atomic64_t              bytes;
>>> +     struct list_head        head;
>>> +     atomic_t                refcnt;
>>> +     char                    name[NFACCT_NAME_MAX];
>>> +     struct rcu_head         rcu_head;
>>> +};
>>>
>>> -struct nf_acct;
>>> +enum nfnl_acct_udt_type {
>>> +     NFNL_ACCT_UDT_PACKETS,
>>> +     NFNL_ACCT_UDT_BYTES,
>>> +};
>>
>> I prefer some like this instead of the one above:
>>
>> enum nfacct_quota_type {
>>         NFACCT_QUOTA_PACKETS = 0,
>>         NFACCT_QUOTA_BYTES
>> };
>>
>>>  struct nf_acct *nfnl_acct_find_get(const char *filter_name);
>>>  void nfnl_acct_put(struct nf_acct *acct);
>>> -void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
>>> +extern u64 nfnl_acct_update(const struct sk_buff *skb,
>>> +                        struct nf_acct *nfacct, int mode);
>>> +void nfnl_quota_event(struct nf_acct *cur);
>>>
>>>  #endif /* _NFNL_ACCT_H */
>>> diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
>>> index 596ddd4..354a7e5 100644
>>> --- a/include/uapi/linux/netfilter/nfnetlink.h
>>> +++ b/include/uapi/linux/netfilter/nfnetlink.h
>>> @@ -20,6 +20,8 @@ enum nfnetlink_groups {
>>>  #define NFNLGRP_CONNTRACK_EXP_DESTROY        NFNLGRP_CONNTRACK_EXP_DESTROY
>>>       NFNLGRP_NFTABLES,
>>>  #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
>>> +     NFNLGRP_ACCT_QUOTA,
>>> +#define NFNLGRP_ACCT_QUOTA           NFNLGRP_ACCT_QUOTA
>>>       __NFNLGRP_MAX,
>>>  };
>>>  #define NFNLGRP_MAX  (__NFNLGRP_MAX - 1)
>>> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
>>> index c7b6269..ae8ea0a 100644
>>> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
>>> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
>>> @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>>>       NFACCT_PKTS,
>>>       NFACCT_BYTES,
>>>       NFACCT_USE,
>>> +     NFACCT_QUOTA,
>>
>> This is wrong. You are using this new attribute (which defines another
>> TLV in the payload of a message) instead of the netlink message type
>> in nfnl_acct_fill_info(). This needs a specific message type to
>> announce that the quota has been exceeded.
>
> If I understand you correctly you want a new entry in
> "nfnl_acct_msg_types" - please confirm.
>
>>
>>>       __NFACCT_MAX
>>>  };
>>>  #define NFACCT_MAX (__NFACCT_MAX - 1)
>>> diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
>>> index 3e19c8a..7c35ce0 100644
>>> --- a/include/uapi/linux/netfilter/xt_nfacct.h
>>> +++ b/include/uapi/linux/netfilter/xt_nfacct.h
>>> @@ -3,11 +3,27 @@
>>>
>>>  #include <linux/netfilter/nfnetlink_acct.h>
>>>
>>> +enum xt_quota_flags {
>>> +     XT_NFACCT_QUOTA_PKTS    = 1 << 0,
>>> +     XT_NFACCT_QUOTA         = 1 << 1,
>>> +};
>>> +
>>>  struct nf_acct;
>>> +struct nf_acct_quota;
>>>
>>>  struct xt_nfacct_match_info {
>>>       char            name[NFACCT_NAME_MAX];
>>> -     struct nf_acct  *nfacct;
>>> +     struct nf_acct  *nfacct __aligned(8);
>>>  };
>>>
>>> +struct xt_nfacct_match_info_v1 {
>>> +     char            name[NFACCT_NAME_MAX];
>>> +     struct nf_acct  *nfacct __aligned(8);
>>
>> You have to move at the bottom of the structure, before the new
>> nf_acct_quota.
>>
>> The extension from userspace sets the .userspacesize field, which
>> helps iptables to skip internal pointers that are set by the kernel,
>> If you don't fix this, iptables -D to delete a rule with nfacct will
>> not work.
>
> Thanks for pointing that out - I've been experiencing problems with
> "iptables -D".
>
>>
>>> +     __u32 flags;
>>> +     __aligned_u64 quota;
>>> +
>>> +     /* used internally by kernel */
>>> +     struct nf_acct_quota *priv __aligned(8);
>>> +};
>>>  #endif /* _XT_NFACCT_MATCH_H */
>>> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
>>> index 748f304..ce184951 100644
>>> --- a/net/netfilter/Kconfig
>>> +++ b/net/netfilter/Kconfig
>>> @@ -1108,7 +1108,8 @@ config NETFILTER_XT_MATCH_NFACCT
>>>       select NETFILTER_NETLINK_ACCT
>>>       help
>>>         This option allows you to use the extended accounting through
>>> -       nfnetlink_acct.
>>> +       nfnetlink_acct.  It is also possible to add quotas at the
>>> +       packet and byte level.
>>>
>>>         To compile it as a module, choose M here.  If unsure, say N.
>>>
>>> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
>>> index c7b6d46..5c580a7 100644
>>> --- a/net/netfilter/nfnetlink_acct.c
>>> +++ b/net/netfilter/nfnetlink_acct.c
>>> @@ -29,15 +29,6 @@ MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
>>>
>>>  static LIST_HEAD(nfnl_acct_list);
>>>
>>> -struct nf_acct {
>>> -     atomic64_t              pkts;
>>> -     atomic64_t              bytes;
>>> -     struct list_head        head;
>>> -     atomic_t                refcnt;
>>> -     char                    name[NFACCT_NAME_MAX];
>>> -     struct rcu_head         rcu_head;
>>> -};
>>> -
>>>  static int
>>>  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>>>            const struct nlmsghdr *nlh, const struct nlattr * const tb[])
>>> @@ -92,7 +83,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
>>>       return 0;
>>>  }
>>>
>>> -static int
>>> +int
>>>  nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
>>>                  int event, struct nf_acct *acct)
>>>  {
>>> @@ -134,6 +125,7 @@ nla_put_failure:
>>>       nlmsg_cancel(skb, nlh);
>>>       return -1;
>>>  }
>>> +EXPORT_SYMBOL_GPL(nfnl_acct_fill_info);
>>
>> I think you don't need to export this symbol.
>>
>>>  static int
>>>  nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
>>> @@ -329,13 +321,41 @@ void nfnl_acct_put(struct nf_acct *acct)
>>>  }
>>>  EXPORT_SYMBOL_GPL(nfnl_acct_put);
>>>
>>> -void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
>>> +u64 nfnl_acct_update(const struct sk_buff *skb,
>>> +                 struct nf_acct *nfacct, int mode)
>>>  {
>>> -     atomic64_inc(&nfacct->pkts);
>>> -     atomic64_add(skb->len, &nfacct->bytes);
>>> +     u64 pkts, bytes;
>>> +
>>> +     pkts = atomic64_inc_return(&nfacct->pkts);
>>> +     bytes = atomic64_add_return(skb->len, &nfacct->bytes);
>>> +
>>> +     return (mode == NFNL_ACCT_UDT_PACKETS) ? pkts : bytes;
>>>  }
>>>  EXPORT_SYMBOL_GPL(nfnl_acct_update);
>>>
>>> +void
>>> +nfnl_quota_event(struct nf_acct *cur)
>>> +{
>>> +     int ret;
>>> +     struct sk_buff *skb;
>>> +
>>> +     skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
>>> +     if (skb == NULL)
>>> +             return;
>>> +
>>> +     ret = nfnl_acct_fill_info(skb, 0, 0, NFACCT_QUOTA,
>>> +                              NFNL_MSG_ACCT_NEW, cur);
>>> +
>>
>> Remove empty line above.
>>
>>> +     if (ret <= 0) {
>>> +             kfree_skb(skb);
>>> +             return;
>>> +     }
>>> +
>>> +     netlink_broadcast(init_net.nfnl, skb, 0,
>>> +                      NFNLGRP_ACCT_QUOTA, GFP_ATOMIC);
>>> +}
>>> +EXPORT_SYMBOL_GPL(nfnl_quota_event);
>>> +
>>>  static int __init nfnl_acct_init(void)
>>>  {
>>>       int ret;
>>> diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
>>> index b3be0ef..d1bd5ea 100644
>>> --- a/net/netfilter/xt_nfacct.c
>>> +++ b/net/netfilter/xt_nfacct.c
>>> @@ -9,8 +9,10 @@
>>>  #include <linux/module.h>
>>>  #include <linux/skbuff.h>
>>>
>>> +#include <net/netlink.h>
>>>  #include <linux/netfilter/x_tables.h>
>>>  #include <linux/netfilter/nfnetlink_acct.h>
>>> +#include <linux/netfilter/nfnetlink.h>
>>>  #include <linux/netfilter/xt_nfacct.h>
>>>
>>>  MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
>>> @@ -19,15 +21,62 @@ MODULE_LICENSE("GPL");
>>>  MODULE_ALIAS("ipt_nfacct");
>>>  MODULE_ALIAS("ip6t_nfacct");
>>>
>>> +static LIST_HEAD(nfacct_list);
>>> +static DEFINE_MUTEX(nfacct_list_mutex);
>>> +
>>> +struct nf_acct_quota {
>>> +     char                    name[NFACCT_NAME_MAX];
>>> +     int                     count;
>>> +     u32                     flags;
>>> +     u64                     quota;
>>> +     struct list_head        node;
>>> +     atomic_t                notified;
>>> +};
>>> +
>>>  static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
>>>  {
>>>       const struct xt_nfacct_match_info *info = par->targinfo;
>>> +     u64 __always_unused val;
>>> -     nfnl_acct_update(skb, info->nfacct);
>>> +     val = nfnl_acct_update(skb, info->nfacct, 0);
>>
>> I think you can just do:
>>
>>         nfnl_acct_update(skb, info->nfacct...);
>>
>> and ignore the returned value in this case.
>>
>>>       return true;
>>>  }
>>>
>>> +static bool
>>> +nfacct_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
>>> +{
>>> +     int mode;
>>> +     u64 val;
>>> +     const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
>>> +     struct nf_acct_quota *acct_quota = info->priv;
>>> +     bool ret = true;
>>> +
>>> +     mode = (acct_quota->flags & XT_NFACCT_QUOTA_PKTS) ?
>>> +            NFNL_ACCT_UDT_PACKETS : NFNL_ACCT_UDT_BYTES;
>>> +
>>> +     /* upgrade accounting stats */
>>> +     val = nfnl_acct_update(skb, info->nfacct, mode);
>>> +
>>> +     /* no need to go further if we don't have a quota */
>>> +     if (!(acct_quota->flags & XT_NFACCT_QUOTA))
>>> +             return ret;
>>> +
>>> +     /* are we over the limit ? */
>>> +     if (val <= info->quota) {
>>
>> You use <=, I think this should be <. Not further below you're using >=.
>
> That is the intended purpose - I take the limit to be inclusive.  If
> you have a limit of 200 packets, you want the packet that makes the
> count go from 199 to 200 to be allowed through.  At the same time you
> want the the notification to be sent out right away since you don't
> know how long it will take to receive the next packet.  Get back to me
> on that if you don't agree with my logic.
>
>>
>>> +             /* reset flag in case userspace cleared the counters */
>>> +             atomic_set(&acct_quota->notified, 0)
>>
>> This has runtime overhead and I think it's racy.
>
> Good catch on the racy part.
>
>  I think we can avoid
>> this if the quota object management is moved to nfnetlink_acct (see
>> below for further explanation). The idea is to iterate over the list
>> of quota objects (to find notified flags that need to be reset) by
>> when NFNL_MSG_ACCT_GET_CTRZERO is called.
>>
>>> +             ret = !ret;
>>> +     }
>>> +
>>> +     if (val >= info->quota)
>>
>> You have to use brackets here:
>>
>>         if (...) {
>>                 ...
>>         }
>>
>>> +             /* cmpxchg guarantees only one CPU can send a notification */
>>> +             if (atomic_cmpxchg(&acct_quota->notified, 0, 1) == 0)
>>> +                     nfnl_quota_event(info->nfacct);
>>> +
>>> +     return ret;
>>> +}
>>> +
>>>  static int
>>>  nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>>>  {
>>> @@ -44,32 +93,140 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
>>>       return 0;
>>>  }
>>>
>>> +static
>>> +struct nf_acct_quota *nfacct_find_quota(char *name)
>>> +{
>>> +     struct nf_acct_quota *curr, *match = NULL;
>>> +
>>> +     mutex_lock(&nfacct_list_mutex);
>>> +     list_for_each_entry(curr, &nfacct_list, node)
>>
>> missing brackets here in list_for_each_entry too.
>>
>>> +             if (strncmp(curr->name, name, NFACCT_NAME_MAX) == 0) {
>>> +                     match = curr;
>>> +                     match->count++;
>>> +                     break;
>>> +             }
>>> +
>>> +     mutex_unlock(&nfacct_list_mutex);
>>> +
>>> +     return match;
>>> +}
>>> +
>>> +static struct  nf_acct_quota *
>>> +nfacct_create_quota(struct xt_nfacct_match_info_v1 *info)
>>
>> Thinking of nftables and the reset notified flag above, I think it's
>> better if you move the quota object management to nfnetlink_acct.c by
>> adding new nfacct commands. The idea is to maintain a quota object
>> base that can be used by xt_nfacct. You have to add the following
>> commands:
>>
>> enum nfnl_acct_msg_types {
>>         ...
>>         NFNL_MSG_ACCT_QUOTA_NEW,
>>         NFNL_MSG_ACCT_QUOTA_GET,
>>         NFNL_MSG_ACCT_QUOTA_DEL,
>>         NFNL_MSG_ACCT_MAX
>> };
>>
>> Then, extend nfnl_callback to include this new commands. The idea is
>> to create/delete quota objects from the netlink interface, then attach
>> them from the iptables nfacct extension.
>>
>> You won't need the mutex anymore btw, since it's already protected by
>> nfnl_lock.
>
> Dealing with the overhead introduced by clearing the "notified" flag
> is completely different from the management of "quota" objects.  To do
> this the best solution is to introduce a callback to reset the flag
> when a "nfacct" object gets reset.  That would be simple and in line
> with what is used ubiquitously in the kernel.
>
> Moving the management of "quota" objects to nfnetlink_acct is a
> completely different ball game.  Not only that, it would also mandate
> to make changes to nfacct, libnfnl and libnl.  It would also require
> to pull a few tricks to make all that extra code compile conditionally
> when XT_QUOTA is not selected, unless we want to automatically select
> XT_QUOTA when NETLINK_ACCT gets enabled.  And what happens when
> another quota type-of-thing comes along?  We keep stuffing "nfacct"
> and "nfnetlink_acct.c" with more functionality?
>
> To me introducing a callback for each of the operation supported by
> "nfnl_acct_cb" is the way to go.  It would allow any future extension
> so take action whenever something happens to an "nfacct" object.
>
> I'll continue thinking about this but take my proposal into account
> and get back to me.
>
>>
>>> +{
>>> +     struct nf_acct_quota *acct_quota;
>>> +
>>> +     mutex_lock(&nfacct_list_mutex);
>>> +
>>> +     acct_quota = kmalloc(sizeof(struct nf_acct_quota), GFP_ATOMIC);
>>> +
>>
>> remove empty line above.
>>
>>> +     if (!acct_quota) {
>>> +             mutex_unlock(&nfacct_list_mutex);
>>> +             return NULL;
>>> +     }
>>> +
>>> +     strncpy(acct_quota->name, info->name, NFACCT_NAME_MAX);
>>> +     acct_quota->flags = info->flags;
>>> +     acct_quota->quota = info->quota;
>>> +     acct_quota->count = 1;
>>> +     atomic_set(&acct_quota->notified, 0);
>>> +     list_add_tail(&acct_quota->node, &nfacct_list);
>>
>> This needs to be converted to _rcu.
>
> You mean in the context that we move forward with your suggestion?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso March 10, 2014, 9:02 p.m. UTC | #4
On Wed, Feb 26, 2014 at 11:38:19AM -0700, Mathieu Poirier wrote:
> >> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
> >> index c7b6269..ae8ea0a 100644
> >> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
> >> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
> >> @@ -19,6 +19,7 @@ enum nfnl_acct_type {
> >>       NFACCT_PKTS,
> >>       NFACCT_BYTES,
> >>       NFACCT_USE,
> >> +     NFACCT_QUOTA,
> >
> > This is wrong. You are using this new attribute (which defines another
> > TLV in the payload of a message) instead of the netlink message type
> > in nfnl_acct_fill_info(). This needs a specific message type to
> > announce that the quota has been exceeded.
> 
> If I understand you correctly you want a new entry in
> "nfnl_acct_msg_types" - please confirm.

Yes. There is no other way to make it.

You are not adding an attribute to the payload of the netlink message
but a new message type...

> >>       __NFACCT_MAX
> >>  };
> >>  #define NFACCT_MAX (__NFACCT_MAX - 1)
> >> diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
> >> index 3e19c8a..7c35ce0 100644
> >> --- a/include/uapi/linux/netfilter/xt_nfacct.h
> >> +++ b/include/uapi/linux/netfilter/xt_nfacct.h
> >> @@ -3,11 +3,27 @@
> >>
> >>  #include <linux/netfilter/nfnetlink_acct.h>
> >>
> >> +enum xt_quota_flags {
> >> +     XT_NFACCT_QUOTA_PKTS    = 1 << 0,
> >> +     XT_NFACCT_QUOTA         = 1 << 1,
> >> +};
> >> +
> >>  struct nf_acct;
> >> +struct nf_acct_quota;
> >>
> >>  struct xt_nfacct_match_info {
> >>       char            name[NFACCT_NAME_MAX];
> >> -     struct nf_acct  *nfacct;
> >> +     struct nf_acct  *nfacct __aligned(8);
> >>  };
> >>
> >> +struct xt_nfacct_match_info_v1 {
> >> +     char            name[NFACCT_NAME_MAX];
> >> +     struct nf_acct  *nfacct __aligned(8);
> >
> > You have to move at the bottom of the structure, before the new
> > nf_acct_quota.
> >
> > The extension from userspace sets the .userspacesize field, which
> > helps iptables to skip internal pointers that are set by the kernel,
> > If you don't fix this, iptables -D to delete a rule with nfacct will
> > not work.
> 
> Thanks for pointing that out - I've been experiencing problems with
> "iptables -D".

BTW, don't change the layout of xt_nfacct_match_info, leave it as is.
Only use struct nf_acct  *nfacct __aligned(8); in struct
xt_nfacct_match_info_v1, OK?

> >> +static bool
> >> +nfacct_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
> >> +{
> >> +     int mode;
> >> +     u64 val;
> >> +     const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
> >> +     struct nf_acct_quota *acct_quota = info->priv;
> >> +     bool ret = true;
> >> +
> >> +     mode = (acct_quota->flags & XT_NFACCT_QUOTA_PKTS) ?
> >> +            NFNL_ACCT_UDT_PACKETS : NFNL_ACCT_UDT_BYTES;
> >> +
> >> +     /* upgrade accounting stats */
> >> +     val = nfnl_acct_update(skb, info->nfacct, mode);
> >> +
> >> +     /* no need to go further if we don't have a quota */
> >> +     if (!(acct_quota->flags & XT_NFACCT_QUOTA))
> >> +             return ret;
> >> +
> >> +     /* are we over the limit ? */
> >> +     if (val <= info->quota) {
> >
> > You use <=, I think this should be <. Not further below you're using >=.
> 
> That is the intended purpose - I take the limit to be inclusive.  If
> you have a limit of 200 packets, you want the packet that makes the
> count go from 199 to 200 to be allowed through.  At the same time you
> want the the notification to be sent out right away since you don't
> know how long it will take to receive the next packet.  Get back to me
> on that if you don't agree with my logic.

OK, makes sense.

> >> +             /* reset flag in case userspace cleared the counters */
> >> +             atomic_set(&acct_quota->notified, 0)
> >
> > This has runtime overhead and I think it's racy.
> 
> Good catch on the racy part.
> 
> > I think we can avoid
> > this if the quota object management is moved to nfnetlink_acct (see
> > below for further explanation). The idea is to iterate over the list
> > of quota objects (to find notified flags that need to be reset) by
> > when NFNL_MSG_ACCT_GET_CTRZERO is called.
> >
[...]
> >> +static struct  nf_acct_quota *
> >> +nfacct_create_quota(struct xt_nfacct_match_info_v1 *info)
> >
> > Thinking of nftables and the reset notified flag above, I think it's
> > better if you move the quota object management to nfnetlink_acct.c by
> > adding new nfacct commands. The idea is to maintain a quota object
> > base that can be used by xt_nfacct. You have to add the following
> > commands:
> >
> > enum nfnl_acct_msg_types {
> >         ...
> >         NFNL_MSG_ACCT_QUOTA_NEW,
> >         NFNL_MSG_ACCT_QUOTA_GET,
> >         NFNL_MSG_ACCT_QUOTA_DEL,
> >         NFNL_MSG_ACCT_MAX
> > };
> >
> > Then, extend nfnl_callback to include this new commands. The idea is
> > to create/delete quota objects from the netlink interface, then attach
> > them from the iptables nfacct extension.
> >
> > You won't need the mutex anymore btw, since it's already protected by
> > nfnl_lock.
> 
> Dealing with the overhead introduced by clearing the "notified" flag
> is completely different from the management of "quota" objects.  To do
> this the best solution is to introduce a callback to reset the flag
> when a "nfacct" object gets reset.  That would be simple and in line
> with what is used ubiquitously in the kernel.
>
> Moving the management of "quota" objects to nfnetlink_acct is a
> completely different ball game.  Not only that, it would also mandate
> to make changes to nfacct, libnfnl and libnl.  It would also require
> to pull a few tricks to make all that extra code compile conditionally
> when XT_QUOTA is not selected, unless we want to automatically select
> XT_QUOTA when NETLINK_ACCT gets enabled.  And what happens when
> another quota type-of-thing comes along?  We keep stuffing "nfacct"
> and "nfnetlink_acct.c" with more functionality?

This is the most generic thing to make it.

How the user will inspect if quota has been already fulfilled? And how
will the user reset it in case it needs it? Or simply upgrade the
quota without reloading the rule? You have to provide a netlink
interface if you want to implement this in a nice way.  Otherwise, I
bet that someone will come with yet another hackish /proc interface
for this, no please. Or worst, people will code perl scripts to parse
the iptables -L -n output.

Of course, it's more code to nfnetlink_acct, the libraries and the
utility, but this will be pretty generic and we'll provide a nice
interface.

And again, thinking of nftables, if we implement the quota management
code in nfnetlink_acct, adding support for nfacct and quota will just
require very little code since we'll reuse most of the common
infrastructure.

I really think the quota management code belongs nfnetlink_acct, not
xt_nfacct.

> To me introducing a callback for each of the operation supported by
> "nfnl_acct_cb" is the way to go.  It would allow any future extension
> so take action whenever something happens to an "nfacct" object.
>
> I'll continue thinking about this but take my proposal into account
> and get back to me.
> 
> >
> >> +{
> >> +     struct nf_acct_quota *acct_quota;
> >> +
> >> +     mutex_lock(&nfacct_list_mutex);
> >> +
> >> +     acct_quota = kmalloc(sizeof(struct nf_acct_quota), GFP_ATOMIC);
> >> +
> >
> > remove empty line above.
> >
> >> +     if (!acct_quota) {
> >> +             mutex_unlock(&nfacct_list_mutex);
> >> +             return NULL;
> >> +     }
> >> +
> >> +     strncpy(acct_quota->name, info->name, NFACCT_NAME_MAX);
> >> +     acct_quota->flags = info->flags;
> >> +     acct_quota->quota = info->quota;
> >> +     acct_quota->count = 1;
> >> +     atomic_set(&acct_quota->notified, 0);
> >> +     list_add_tail(&acct_quota->node, &nfacct_list);
> >
> > This needs to be converted to _rcu.
> 
> You mean in the context that we move forward with your suggestion?

Right.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mathieu Poirier March 11, 2014, 3:24 p.m. UTC | #5
On 10 March 2014 15:02, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Wed, Feb 26, 2014 at 11:38:19AM -0700, Mathieu Poirier wrote:
>> >> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
>> >> index c7b6269..ae8ea0a 100644
>> >> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h
>> >> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
>> >> @@ -19,6 +19,7 @@ enum nfnl_acct_type {
>> >>       NFACCT_PKTS,
>> >>       NFACCT_BYTES,
>> >>       NFACCT_USE,
>> >> +     NFACCT_QUOTA,
>> >
>> > This is wrong. You are using this new attribute (which defines another
>> > TLV in the payload of a message) instead of the netlink message type
>> > in nfnl_acct_fill_info(). This needs a specific message type to
>> > announce that the quota has been exceeded.
>>
>> If I understand you correctly you want a new entry in
>> "nfnl_acct_msg_types" - please confirm.
>
> Yes. There is no other way to make it.
>
> You are not adding an attribute to the payload of the netlink message
> but a new message type...

ACK

>
>> >>       __NFACCT_MAX
>> >>  };
>> >>  #define NFACCT_MAX (__NFACCT_MAX - 1)
>> >> diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
>> >> index 3e19c8a..7c35ce0 100644
>> >> --- a/include/uapi/linux/netfilter/xt_nfacct.h
>> >> +++ b/include/uapi/linux/netfilter/xt_nfacct.h
>> >> @@ -3,11 +3,27 @@
>> >>
>> >>  #include <linux/netfilter/nfnetlink_acct.h>
>> >>
>> >> +enum xt_quota_flags {
>> >> +     XT_NFACCT_QUOTA_PKTS    = 1 << 0,
>> >> +     XT_NFACCT_QUOTA         = 1 << 1,
>> >> +};
>> >> +
>> >>  struct nf_acct;
>> >> +struct nf_acct_quota;
>> >>
>> >>  struct xt_nfacct_match_info {
>> >>       char            name[NFACCT_NAME_MAX];
>> >> -     struct nf_acct  *nfacct;
>> >> +     struct nf_acct  *nfacct __aligned(8);
>> >>  };
>> >>
>> >> +struct xt_nfacct_match_info_v1 {
>> >> +     char            name[NFACCT_NAME_MAX];
>> >> +     struct nf_acct  *nfacct __aligned(8);
>> >
>> > You have to move at the bottom of the structure, before the new
>> > nf_acct_quota.
>> >
>> > The extension from userspace sets the .userspacesize field, which
>> > helps iptables to skip internal pointers that are set by the kernel,
>> > If you don't fix this, iptables -D to delete a rule with nfacct will
>> > not work.
>>
>> Thanks for pointing that out - I've been experiencing problems with
>> "iptables -D".
>
> BTW, don't change the layout of xt_nfacct_match_info, leave it as is.
> Only use struct nf_acct  *nfacct __aligned(8); in struct
> xt_nfacct_match_info_v1, OK?

Ok, but why?  There is obviously a reason and I don't understand it.

>
>> >> +static bool
>> >> +nfacct_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
>> >> +{
>> >> +     int mode;
>> >> +     u64 val;
>> >> +     const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
>> >> +     struct nf_acct_quota *acct_quota = info->priv;
>> >> +     bool ret = true;
>> >> +
>> >> +     mode = (acct_quota->flags & XT_NFACCT_QUOTA_PKTS) ?
>> >> +            NFNL_ACCT_UDT_PACKETS : NFNL_ACCT_UDT_BYTES;
>> >> +
>> >> +     /* upgrade accounting stats */
>> >> +     val = nfnl_acct_update(skb, info->nfacct, mode);
>> >> +
>> >> +     /* no need to go further if we don't have a quota */
>> >> +     if (!(acct_quota->flags & XT_NFACCT_QUOTA))
>> >> +             return ret;
>> >> +
>> >> +     /* are we over the limit ? */
>> >> +     if (val <= info->quota) {
>> >
>> > You use <=, I think this should be <. Not further below you're using >=.
>>
>> That is the intended purpose - I take the limit to be inclusive.  If
>> you have a limit of 200 packets, you want the packet that makes the
>> count go from 199 to 200 to be allowed through.  At the same time you
>> want the the notification to be sent out right away since you don't
>> know how long it will take to receive the next packet.  Get back to me
>> on that if you don't agree with my logic.
>
> OK, makes sense.
>
>> >> +             /* reset flag in case userspace cleared the counters */
>> >> +             atomic_set(&acct_quota->notified, 0)
>> >
>> > This has runtime overhead and I think it's racy.
>>
>> Good catch on the racy part.
>>
>> > I think we can avoid
>> > this if the quota object management is moved to nfnetlink_acct (see
>> > below for further explanation). The idea is to iterate over the list
>> > of quota objects (to find notified flags that need to be reset) by
>> > when NFNL_MSG_ACCT_GET_CTRZERO is called.
>> >
> [...]
>> >> +static struct  nf_acct_quota *
>> >> +nfacct_create_quota(struct xt_nfacct_match_info_v1 *info)
>> >
>> > Thinking of nftables and the reset notified flag above, I think it's
>> > better if you move the quota object management to nfnetlink_acct.c by
>> > adding new nfacct commands. The idea is to maintain a quota object
>> > base that can be used by xt_nfacct. You have to add the following
>> > commands:
>> >
>> > enum nfnl_acct_msg_types {
>> >         ...
>> >         NFNL_MSG_ACCT_QUOTA_NEW,
>> >         NFNL_MSG_ACCT_QUOTA_GET,
>> >         NFNL_MSG_ACCT_QUOTA_DEL,
>> >         NFNL_MSG_ACCT_MAX
>> > };
>> >
>> > Then, extend nfnl_callback to include this new commands. The idea is
>> > to create/delete quota objects from the netlink interface, then attach
>> > them from the iptables nfacct extension.
>> >
>> > You won't need the mutex anymore btw, since it's already protected by
>> > nfnl_lock.
>>
>> Dealing with the overhead introduced by clearing the "notified" flag
>> is completely different from the management of "quota" objects.  To do
>> this the best solution is to introduce a callback to reset the flag
>> when a "nfacct" object gets reset.  That would be simple and in line
>> with what is used ubiquitously in the kernel.
>>
>> Moving the management of "quota" objects to nfnetlink_acct is a
>> completely different ball game.  Not only that, it would also mandate
>> to make changes to nfacct, libnfnl and libnl.  It would also require
>> to pull a few tricks to make all that extra code compile conditionally
>> when XT_QUOTA is not selected, unless we want to automatically select
>> XT_QUOTA when NETLINK_ACCT gets enabled.  And what happens when
>> another quota type-of-thing comes along?  We keep stuffing "nfacct"
>> and "nfnetlink_acct.c" with more functionality?
>
> This is the most generic thing to make it.
>
> How the user will inspect if quota has been already fulfilled? And how
> will the user reset it in case it needs it? Or simply upgrade the
> quota without reloading the rule?

Since quotas are intrinsically linked to nfacct objects, the best way
to know if a quota has been reached is to look at the statistics for
the corresponding nfacct object.  Even if quota status was available
via cmdline, users would still have to parse iptables rules to know
the nfacct object the quota belongs to, unless we add that information
in the output status of quota objects along with the mode.

> You have to provide a netlink
> interface if you want to implement this in a nice way.  Otherwise, I
> bet that someone will come with yet another hackish /proc interface
> for this, no please. Or worst, people will code perl scripts to parse
> the iptables -L -n output.

We could create a sysfs entry for each quota but it doesn't scale well.

>
> Of course, it's more code to nfnetlink_acct, the libraries and the
> utility, but this will be pretty generic and we'll provide a nice
> interface.
>
> And again, thinking of nftables, if we implement the quota management
> code in nfnetlink_acct, adding support for nfacct and quota will just
> require very little code since we'll reuse most of the common
> infrastructure.
>
> I really think the quota management code belongs nfnetlink_acct, not
> xt_nfacct.

I'm not against your idea but the thought of starting over after 1)
agreeing on a design and 2) investing so much time is disappointing.
If we end up choosing your way, would you prefer enhancing the nfacct
tool with quota capabilities or adding something like "nfquota"?
Personally I would prefer the latter as it scales better.

>
>> To me introducing a callback for each of the operation supported by
>> "nfnl_acct_cb" is the way to go.  It would allow any future extension
>> so take action whenever something happens to an "nfacct" object.
>>
>> I'll continue thinking about this but take my proposal into account
>> and get back to me.
>>
>> >
>> >> +{
>> >> +     struct nf_acct_quota *acct_quota;
>> >> +
>> >> +     mutex_lock(&nfacct_list_mutex);
>> >> +
>> >> +     acct_quota = kmalloc(sizeof(struct nf_acct_quota), GFP_ATOMIC);
>> >> +
>> >
>> > remove empty line above.
>> >
>> >> +     if (!acct_quota) {
>> >> +             mutex_unlock(&nfacct_list_mutex);
>> >> +             return NULL;
>> >> +     }
>> >> +
>> >> +     strncpy(acct_quota->name, info->name, NFACCT_NAME_MAX);
>> >> +     acct_quota->flags = info->flags;
>> >> +     acct_quota->quota = info->quota;
>> >> +     acct_quota->count = 1;
>> >> +     atomic_set(&acct_quota->notified, 0);
>> >> +     list_add_tail(&acct_quota->node, &nfacct_list);
>> >
>> > This needs to be converted to _rcu.
>>
>> You mean in the context that we move forward with your suggestion?
>
> Right.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index b2e85e5..bb04204 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -3,11 +3,24 @@ 
 
 #include <uapi/linux/netfilter/nfnetlink_acct.h>
 
+struct nf_acct {
+	atomic64_t		pkts;
+	atomic64_t		bytes;
+	struct list_head	head;
+	atomic_t		refcnt;
+	char			name[NFACCT_NAME_MAX];
+	struct rcu_head		rcu_head;
+};
 
-struct nf_acct;
+enum nfnl_acct_udt_type {
+	NFNL_ACCT_UDT_PACKETS,
+	NFNL_ACCT_UDT_BYTES,
+};
 
 struct nf_acct *nfnl_acct_find_get(const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
-void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+extern u64 nfnl_acct_update(const struct sk_buff *skb,
+			   struct nf_acct *nfacct, int mode);
+void nfnl_quota_event(struct nf_acct *cur);
 
 #endif /* _NFNL_ACCT_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 596ddd4..354a7e5 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -20,6 +20,8 @@  enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
 	NFNLGRP_NFTABLES,
 #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
+	NFNLGRP_ACCT_QUOTA,
+#define NFNLGRP_ACCT_QUOTA		NFNLGRP_ACCT_QUOTA
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index c7b6269..ae8ea0a 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -19,6 +19,7 @@  enum nfnl_acct_type {
 	NFACCT_PKTS,
 	NFACCT_BYTES,
 	NFACCT_USE,
+	NFACCT_QUOTA,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
index 3e19c8a..7c35ce0 100644
--- a/include/uapi/linux/netfilter/xt_nfacct.h
+++ b/include/uapi/linux/netfilter/xt_nfacct.h
@@ -3,11 +3,27 @@ 
 
 #include <linux/netfilter/nfnetlink_acct.h>
 
+enum xt_quota_flags {
+	XT_NFACCT_QUOTA_PKTS	= 1 << 0,
+	XT_NFACCT_QUOTA		= 1 << 1,
+};
+
 struct nf_acct;
+struct nf_acct_quota;
 
 struct xt_nfacct_match_info {
 	char		name[NFACCT_NAME_MAX];
-	struct nf_acct	*nfacct;
+	struct nf_acct	*nfacct __aligned(8);
 };
 
+struct xt_nfacct_match_info_v1 {
+	char		name[NFACCT_NAME_MAX];
+	struct nf_acct	*nfacct __aligned(8);
+
+	__u32 flags;
+	__aligned_u64 quota;
+
+	/* used internally by kernel */
+	struct nf_acct_quota *priv __aligned(8);
+};
 #endif /* _XT_NFACCT_MATCH_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 748f304..ce184951 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1108,7 +1108,8 @@  config NETFILTER_XT_MATCH_NFACCT
 	select NETFILTER_NETLINK_ACCT
 	help
 	  This option allows you to use the extended accounting through
-	  nfnetlink_acct.
+	  nfnetlink_acct.  It is also possible to add quotas at the
+	  packet and byte level.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d46..5c580a7 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -29,15 +29,6 @@  MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
 
 static LIST_HEAD(nfnl_acct_list);
 
-struct nf_acct {
-	atomic64_t		pkts;
-	atomic64_t		bytes;
-	struct list_head	head;
-	atomic_t		refcnt;
-	char			name[NFACCT_NAME_MAX];
-	struct rcu_head		rcu_head;
-};
-
 static int
 nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
@@ -92,7 +83,7 @@  nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	return 0;
 }
 
-static int
+int
 nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		   int event, struct nf_acct *acct)
 {
@@ -134,6 +125,7 @@  nla_put_failure:
 	nlmsg_cancel(skb, nlh);
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nfnl_acct_fill_info);
 
 static int
 nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -329,13 +321,41 @@  void nfnl_acct_put(struct nf_acct *acct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_put);
 
-void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
+u64 nfnl_acct_update(const struct sk_buff *skb,
+		    struct nf_acct *nfacct, int mode)
 {
-	atomic64_inc(&nfacct->pkts);
-	atomic64_add(skb->len, &nfacct->bytes);
+	u64 pkts, bytes;
+
+	pkts = atomic64_inc_return(&nfacct->pkts);
+	bytes = atomic64_add_return(skb->len, &nfacct->bytes);
+
+	return (mode == NFNL_ACCT_UDT_PACKETS) ? pkts : bytes;
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
+void
+nfnl_quota_event(struct nf_acct *cur)
+{
+	int ret;
+	struct sk_buff *skb;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	ret = nfnl_acct_fill_info(skb, 0, 0, NFACCT_QUOTA,
+				 NFNL_MSG_ACCT_NEW, cur);
+
+	if (ret <= 0) {
+		kfree_skb(skb);
+		return;
+	}
+
+	netlink_broadcast(init_net.nfnl, skb, 0,
+			 NFNLGRP_ACCT_QUOTA, GFP_ATOMIC);
+}
+EXPORT_SYMBOL_GPL(nfnl_quota_event);
+
 static int __init nfnl_acct_init(void)
 {
 	int ret;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef..d1bd5ea 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -9,8 +9,10 @@ 
 #include <linux/module.h>
 #include <linux/skbuff.h>
 
+#include <net/netlink.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/xt_nfacct.h>
 
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
@@ -19,15 +21,62 @@  MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_nfacct");
 MODULE_ALIAS("ip6t_nfacct");
 
+static LIST_HEAD(nfacct_list);
+static DEFINE_MUTEX(nfacct_list_mutex);
+
+struct nf_acct_quota {
+	char			name[NFACCT_NAME_MAX];
+	int			count;
+	u32			flags;
+	u64			quota;
+	struct list_head	node;
+	atomic_t		notified;
+};
+
 static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_nfacct_match_info *info = par->targinfo;
+	u64 __always_unused val;
 
-	nfnl_acct_update(skb, info->nfacct);
+	val = nfnl_acct_update(skb, info->nfacct, 0);
 
 	return true;
 }
 
+static bool
+nfacct_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	int mode;
+	u64 val;
+	const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
+	struct nf_acct_quota *acct_quota = info->priv;
+	bool ret = true;
+
+	mode = (acct_quota->flags & XT_NFACCT_QUOTA_PKTS) ?
+	       NFNL_ACCT_UDT_PACKETS : NFNL_ACCT_UDT_BYTES;
+
+	/* upgrade accounting stats */
+	val = nfnl_acct_update(skb, info->nfacct, mode);
+
+	/* no need to go further if we don't have a quota */
+	if (!(acct_quota->flags & XT_NFACCT_QUOTA))
+		return ret;
+
+	/* are we over the limit ? */
+	if (val <= info->quota) {
+		/* reset flag in case userspace cleared the counters */
+		atomic_set(&acct_quota->notified, 0);
+		ret = !ret;
+	}
+
+	if (val >= info->quota)
+		/* cmpxchg guarantees only one CPU can send a notification */
+		if (atomic_cmpxchg(&acct_quota->notified, 0, 1) == 0)
+			nfnl_quota_event(info->nfacct);
+
+	return ret;
+}
+
 static int
 nfacct_mt_checkentry(const struct xt_mtchk_param *par)
 {
@@ -44,32 +93,140 @@  nfacct_mt_checkentry(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static
+struct nf_acct_quota *nfacct_find_quota(char *name)
+{
+	struct nf_acct_quota *curr, *match = NULL;
+
+	mutex_lock(&nfacct_list_mutex);
+	list_for_each_entry(curr, &nfacct_list, node)
+		if (strncmp(curr->name, name, NFACCT_NAME_MAX) == 0) {
+			match = curr;
+			match->count++;
+			break;
+		}
+
+	mutex_unlock(&nfacct_list_mutex);
+
+	return match;
+}
+
+static struct  nf_acct_quota *
+nfacct_create_quota(struct xt_nfacct_match_info_v1 *info)
+{
+	struct nf_acct_quota *acct_quota;
+
+	mutex_lock(&nfacct_list_mutex);
+
+	acct_quota = kmalloc(sizeof(struct nf_acct_quota), GFP_ATOMIC);
+
+	if (!acct_quota) {
+		mutex_unlock(&nfacct_list_mutex);
+		return NULL;
+	}
+
+	strncpy(acct_quota->name, info->name, NFACCT_NAME_MAX);
+	acct_quota->flags = info->flags;
+	acct_quota->quota = info->quota;
+	acct_quota->count = 1;
+	atomic_set(&acct_quota->notified, 0);
+	list_add_tail(&acct_quota->node, &nfacct_list);
+
+	mutex_unlock(&nfacct_list_mutex);
+
+	return acct_quota;
+}
+
+static int
+nfacct_mt_checkentry_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_nfacct_match_info_v1 *info = par->matchinfo;
+	struct nf_acct *nfacct;
+	struct nf_acct_quota *acct_quota = NULL;
+
+	nfacct = nfnl_acct_find_get(info->name);
+	if (nfacct == NULL) {
+		pr_info("xt_nfacct: invalid accounting object `%s'\n",
+		       info->name);
+		return -ENOENT;
+	}
+
+	info->nfacct = nfacct;
+
+	if (info->flags & XT_NFACCT_QUOTA) {
+		/* get quota object from list */
+		acct_quota = nfacct_find_quota(info->name);
+
+		/* object doesn't exist - create it */
+		if (acct_quota == NULL) {
+			acct_quota = nfacct_create_quota(info);
+			/* something went wrong - get out of here */
+			if (acct_quota == NULL) {
+				nfnl_acct_put(info->nfacct);
+				return -ENOMEM;
+			}
+		}
+	}
+
+	info->priv = acct_quota;
+
+	return 0;
+}
+
 static void
 nfacct_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_nfacct_match_info *info = par->matchinfo;
+	nfnl_acct_put(info->nfacct);
+}
+
+static void
+nfacct_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	const struct xt_nfacct_match_info_v1 *info = par->matchinfo;
+	struct nf_acct_quota *acct_quota = info->priv;
 
 	nfnl_acct_put(info->nfacct);
+
+	mutex_lock(&nfacct_list_mutex);
+	if (--acct_quota->count == 0) {
+		list_del(&acct_quota->node);
+		kfree(acct_quota);
+	}
+	mutex_unlock(&nfacct_list_mutex);
 }
 
-static struct xt_match nfacct_mt_reg __read_mostly = {
-	.name       = "nfacct",
-	.family     = NFPROTO_UNSPEC,
-	.checkentry = nfacct_mt_checkentry,
-	.match      = nfacct_mt,
-	.destroy    = nfacct_mt_destroy,
-	.matchsize  = sizeof(struct xt_nfacct_match_info),
-	.me         = THIS_MODULE,
+static struct xt_match nfacct_mt_reg[] __read_mostly = {
+	{
+		.name       = "nfacct",
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "nfacct",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry_v1,
+		.match      = nfacct_mt_v1,
+		.destroy    = nfacct_mt_destroy_v1,
+		.matchsize  = sizeof(struct xt_nfacct_match_info_v1),
+		.me         = THIS_MODULE,
+	},
+
 };
 
 static int __init nfacct_mt_init(void)
 {
-	return xt_register_match(&nfacct_mt_reg);
+	return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
 }
 
 static void __exit nfacct_mt_exit(void)
 {
-	xt_unregister_match(&nfacct_mt_reg);
+	xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
 }
 
 module_init(nfacct_mt_init);