Patchwork [RFC,v4] cgroup: net_cls: traffic counter based on classification control cgroup

login
register
mail settings
Submitter Alexey Perevalov
Date Jan. 18, 2013, 1:59 p.m.
Message ID <20130118135944.GA15062@aperevalov>
Download mbox | patch
Permalink /patch/213612/
State RFC
Delegated to: David Miller
Headers show

Comments

Alexey Perevalov - Jan. 18, 2013, 1:59 p.m.
Traffic counter based on classification control group

	The main goal of this patch it's counting traffic for process placed to
	net_cls cgroup (ingress and egress).
	It's based on atomic and holds counter per network interfaces.

	It handles packets in net/core/dev.c for egress and in
	/net/ipv4/tcp.c|udp.c for ingress.

	Cgroup fs interface provides following files additional to existing
		net_cls files:
			net_cls.ifacename.usage_in_bytes
			Containing rcv/snd lines.

Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
---
 Documentation/cgroups/net_cls.txt |   20 ++++
 include/net/cls_cgroup.h          |  200 +++++++++++++++++++++++++++++++++----
 include/net/cls_counter_holder.h  |   26 +++++
 init/Kconfig                      |   25 +++++
 kernel/cgroup.c                   |    2 +
 kernel/res_counter.c              |    4 +
 net/core/dev.c                    |    6 ++
 net/ipv4/tcp.c                    |   27 ++++-
 net/ipv4/udp.c                    |    6 ++
 net/sched/Kconfig                 |   11 --
 net/sched/Makefile                |    1 +
 net/sched/cls_cgroup.c            |  194 ++++++++++++++++++++++++++++++++++-
 net/sched/cls_counter_holder.c    |  144 ++++++++++++++++++++++++++
 13 files changed, 631 insertions(+), 35 deletions(-)
 create mode 100644 Documentation/cgroups/net_cls.txt
 create mode 100644 include/net/cls_counter_holder.h
 create mode 100644 net/sched/cls_counter_holder.c
Eric Dumazet - Jan. 18, 2013, 2:27 p.m.
On Fri, 2013-01-18 at 17:59 +0400, Alexey Perevalov wrote:
> Traffic counter based on classification control group
> 
> 	The main goal of this patch it's counting traffic for process placed to
> 	net_cls cgroup (ingress and egress).
> 	It's based on atomic and holds counter per network interfaces.
> 
> 	It handles packets in net/core/dev.c for egress and in
> 	/net/ipv4/tcp.c|udp.c for ingress.
> 
> 	Cgroup fs interface provides following files additional to existing
> 		net_cls files:
> 			net_cls.ifacename.usage_in_bytes
> 			Containing rcv/snd lines.
> 
> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
> ---

...

> index d1e8116..ffc9ec2 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -135,6 +135,7 @@
>  #include <linux/net_tstamp.h>
>  #include <linux/static_key.h>
>  #include <net/flow_keys.h>
> +#include <net/cls_cgroup.h>
>  
>  #include "net-sysfs.h"
>  
> @@ -2922,6 +2923,11 @@ int dev_queue_xmit(struct sk_buff *skb)
>  	 */
>  	rcu_read_lock_bh();
>  
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +	if (dev)

How dev could be NULL here ?

> +		count_cls_snd(skb_cls_classid(skb), skb->len, dev->name);
> +#endif

Why do you add these ugly ifdefs in the C files ?

As already said, you should not add new ifdef in C files, but properly
define empty functions in the header files

#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
 ...
#else
 static inline void count_cls_snd( args ) { }
#endif




> +
>  	skb_update_prio(skb);
>  
>  	txq = netdev_pick_tx(dev, skb);
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 1ca2536..dc4dc3a 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -276,6 +276,7 @@
>  #include <net/ip.h>
>  #include <net/netdma.h>
>  #include <net/sock.h>
> +#include <net/cls_cgroup.h>
>  
>  #include <asm/uaccess.h>
>  #include <asm/ioctls.h>
> @@ -1464,6 +1465,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
>  	u32 seq = tp->copied_seq;
>  	u32 offset;
>  	int copied = 0;
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +	int ifindex = 0;
> +#endif
>  
>  	if (sk->sk_state == TCP_LISTEN)
>  		return -ENOTCONN;
> @@ -1510,6 +1514,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
>  			++seq;
>  			break;
>  		}
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +		ifindex = get_ifindex_from_skb(skb);
> +#endif
>  		sk_eat_skb(sk, skb, false);
>  		if (!desc->count)
>  			break;
> @@ -1520,8 +1527,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
>  	tcp_rcv_space_adjust(sk);
>  
>  	/* Clean up data we have read: This will do ACK frames. */
> -	if (copied > 0)
> +	if (copied > 0) {
>  		tcp_cleanup_rbuf(sk, copied);
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +		count_cls_rcv(current, copied, ifindex);
> +#endif
> +	}
>  	return copied;
>  }
>  EXPORT_SYMBOL(tcp_read_sock);
> @@ -1549,6 +1560,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
>  	bool copied_early = false;
>  	struct sk_buff *skb;
>  	u32 urg_hole = 0;
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +	int ifindex = 0;
> +#endif
>  
>  	lock_sock(sk);
>  
> @@ -1873,6 +1887,9 @@ skip_copy:
>  		if (tcp_hdr(skb)->fin)
>  			goto found_fin_ok;
>  		if (!(flags & MSG_PEEK)) {
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +			ifindex = get_ifindex_from_skb(skb);
> +#endif
>  			sk_eat_skb(sk, skb, copied_early);
>  			copied_early = false;
>  		}
> @@ -1882,6 +1899,9 @@ skip_copy:
>  		/* Process the FIN. */
>  		++*seq;
>  		if (!(flags & MSG_PEEK)) {
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +			ifindex = get_ifindex_from_skb(skb);
> +#endif
>  			sk_eat_skb(sk, skb, copied_early);
>  			copied_early = false;
>  		}
> @@ -1924,6 +1944,11 @@ skip_copy:
>  	/* Clean up data we have read: This will do ACK frames. */
>  	tcp_cleanup_rbuf(sk, copied);
>  
> +#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
> +	if (copied > 0)
> +		count_cls_rcv(current, copied, ifindex);
> +#endif
> +
>  	release_sock(sk);
>  	return copied;
>  


There is no need to add so much ugly code in tcp at least

For input packets, as soon as we identify a matching socket in IP/TCP
early demux, we are done. It permits to properly account packets, even
the retransmits and ACKS, and even packets dropped by netfilter or
sk_filter.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroups/net_cls.txt
new file mode 100644
index 0000000..131fbf1
--- /dev/null
+++ b/Documentation/cgroups/net_cls.txt
@@ -0,0 +1,20 @@ 
+Network packet classification cgroup
+--------------------------------------
+
+The net_cls cgroup is attended to mark engress network packets with special
+identifier (classid). The main purpose of marking packets with classid is
+using it in linux QoS mechanism. It is needed to tweak TC rules for cgroup
+related processes.
+
+Cgroup net_cls marks engress packets in sk_alloc.
+
+Also this cgroup provides ability to count network traffic per network
+interface and provides it in according files.
+
+File system representation of cgroup consists of the standart for cgroupfs
+files like tasks, cgroup.procs, cgroup.clone_children, cgroup.event_control.
+
+Net_cls cgroup introduces they own net_cls.classid.
+Counting part contains counter file with the following names:
+net_cls.iface_name.usage_in_bytes
+consists of 2 lines with rcv and snd.
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 2581638..e3bfe6f 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -13,54 +13,197 @@ 
 #ifndef _NET_CLS_CGROUP_H
 #define _NET_CLS_CGROUP_H
 
+#include <linux/atomic.h>
 #include <linux/cgroup.h>
 #include <linux/hardirq.h>
 #include <linux/rcupdate.h>
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+#include <linux/nsproxy.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <net/cls_counter_holder.h>
+#include <net/sock.h>
+
+/*TODO hide all it to separate file*/
+
+struct cls_iface_cntrs {
+	char *dev_name;
+	atomic64_t snd_counter;
+	atomic64_t rcv_counter;
+	struct list_head link;
+};
+
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
+
 #if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
 struct cgroup_cls_state
 {
 	struct cgroup_subsys_state css;
 	u32 classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	struct cls_iface_cntrs iface_stats;
+#endif /*CONFIG_NET_CLS_COUNTER*/
 };
 
 extern void sock_update_classid(struct sock *sk, struct task_struct *task);
 
-#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
-static inline u32 task_cls_classid(struct task_struct *p)
+#if IS_MODULE(CONFIG_NET_CLS_CGROUP)
+static inline struct cgroup_cls_state *get_cls_cgroup(struct task_struct *p)
 {
-	u32 classid;
+	struct cgroup_subsys_state *css = task_subsys_state(p,
+		net_cls_subsys_id);
+	if (css)
+		return container_of(css,
+				       struct cgroup_cls_state, css);
+	return NULL;
+}
+#elif IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
+static inline struct cgroup_cls_state *get_cls_cgroup(struct task_struct *p)
+{
+	return container_of(task_subsys_state(p, net_cls_subsys_id),
+			       struct cgroup_cls_state, css);
+}
+#endif
 
-	if (in_interrupt())
-		return 0;
 
-	rcu_read_lock();
-	classid = container_of(task_subsys_state(p, net_cls_subsys_id),
-			       struct cgroup_cls_state, css)->classid;
-	rcu_read_unlock();
+#endif /*CONFIG_NET_CLS_CGROUP*/
 
-	return classid;
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline u32 skb_cls_classid(const struct sk_buff *skb)
+{
+	return (skb && skb->sk) ? skb->sk->sk_classid : 0;
+}
+
+static inline int get_ifindex_from_skb(const struct sk_buff *skb)
+{
+	int ifindex = 0;
+	if (skb)
+		ifindex = skb->skb_iif;
+	return ifindex;
+}
+
+static struct cls_iface_cntrs *find_cls_counter(
+	struct cgroup_cls_state *cls_cgroup,
+	const char *dev_name,
+	bool create)
+{
+	/*TODO Add lock*/
+	struct cls_iface_cntrs *entry = NULL;
+
+	if (!dev_name) {
+		pr_err("cls please provide valid dev name");
+		return NULL;
+	}
+
+	list_for_each_entry(entry, &cls_cgroup->iface_stats.link, link)
+		if (!strcmp(entry->dev_name, dev_name))
+			return entry;
+
+	if (!create)
+		return entry;
+
+	/*not found, insert*/
+	entry = kmalloc(sizeof(struct cls_iface_cntrs), GFP_ATOMIC);
+	entry->dev_name = kstrdup(dev_name, GFP_ATOMIC);
+	atomic64_set(&entry->rcv_counter, 0);
+	atomic64_set(&entry->snd_counter, 0);
+	list_add_tail(&entry->link, &cls_cgroup->iface_stats.link);
+	return entry;
 }
-#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
+
+static void charge_net_cls_snd(struct cgroup_cls_state *cls_cgroup,
+	const u32 copied, const char *dev_name)
+{
+	struct cls_iface_cntrs *cnt = find_cls_counter(cls_cgroup,
+		dev_name, true);
+
+	if (!cnt)
+		return;
+
+	atomic64_add(copied, &cnt->snd_counter);
+}
+
+static char *get_dev_name(const int ifindex)
+{
+	struct net *net = NULL;
+	struct nsproxy *nsproxy = NULL;
+	struct net_device *net_dev = NULL;
+
+	nsproxy = task_nsproxy(current);
+	if (!nsproxy) {
+		pr_debug("cls cant find task_nsproxy");
+		return NULL;
+	}
+
+	net = get_net(nsproxy->net_ns);
+	if (!net) {
+		pr_debug("cls cant find net");
+		return NULL;
+	}
+	net_dev = dev_get_by_index(net, ifindex);
+
+	return net_dev ? net_dev->name : NULL;
+}
+
+static void charge_net_cls_rcv(struct cgroup_cls_state *cls_cgroup,
+	const u32 copied, const int ifindex)
+{
+	char *dev_name = get_dev_name(ifindex);
+	struct cls_iface_cntrs *cnt = find_cls_counter(cls_cgroup,
+		dev_name, true);
+
+	if (!cnt)
+		return;
+
+	atomic64_add(copied, &cnt->rcv_counter);
+}
+
+static inline void count_cls_rcv(struct task_struct *p, const u32 copied,
+	const int ifindex)
+{
+	struct cgroup_cls_state *cls_cgroup;
+
+	cls_cgroup = get_cls_cgroup(p);
+
+	if (cls_cgroup)
+		charge_net_cls_rcv(cls_cgroup, copied, ifindex);
+}
+
+static inline void count_cls_snd(u32 classid, const u32 copied,
+	const char *dev_name)
+{
+	struct cgroup_cls_state *cls_cgroup;
+
+	cls_cgroup = find_cls_cgroup_by_classid(classid);
+
+	if (cls_cgroup)
+		charge_net_cls_snd(cls_cgroup, copied, dev_name);
+}
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
 static inline u32 task_cls_classid(struct task_struct *p)
 {
-	struct cgroup_subsys_state *css;
-	u32 classid = 0;
+	int classid = 0;
+	struct cgroup_cls_state *cls_cgroup = NULL;
 
 	if (in_interrupt())
 		return 0;
 
 	rcu_read_lock();
-	css = task_subsys_state(p, net_cls_subsys_id);
-	if (css)
-		classid = container_of(css,
-				       struct cgroup_cls_state, css)->classid;
+
+	cls_cgroup = get_cls_cgroup(p);
+	if (cls_cgroup)
+		classid = cls_cgroup->classid;
+
 	rcu_read_unlock();
 
 	return classid;
 }
-#endif
-#else /* !CGROUP_NET_CLS_CGROUP */
+
+#else /* !CONFIG_NET_CLS_CGROUP */
 static inline void sock_update_classid(struct sock *sk, struct task_struct *task)
 {
 }
@@ -69,5 +212,22 @@  static inline u32 task_cls_classid(struct task_struct *p)
 {
 	return 0;
 }
-#endif /* CGROUP_NET_CLS_CGROUP */
+#endif /* CONFIG_NET_CLS_CGROUP */
+
+#if !IS_ENABLED(CONFIG_NET_CLS_CGROUP) || !IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline void count_cls_rcv(struct task_struct *p, const u32 copied,
+				 const int ifindex)
+{
+}
+
+static inline void count_cls_snd(u32 classid, const u32 copied,
+				 const char *dev_name)
+{
+}
+
+static inline u32 skb_cls_classid(const struct sk_buff *skb)
+{
+	return 0;
+}
+#endif
 #endif  /* _NET_CLS_CGROUP_H */
diff --git a/include/net/cls_counter_holder.h b/include/net/cls_counter_holder.h
new file mode 100644
index 0000000..a129baa
--- /dev/null
+++ b/include/net/cls_counter_holder.h
@@ -0,0 +1,26 @@ 
+/*
+ * cls_counter_holder.c  Interface for holding references of the
+ *                       net cls cgroup instances.
+ *
+ * Authors:	Alexey Perevalov, <a.perevalov@samsung.com>
+ *
+ * Changes:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_CLS_COUNTER_HOLDER_H_
+#define _NET_CLS_COUNTER_HOLDER_H_
+
+#include <net/cls_cgroup.h>
+
+struct cgroup_cls_state;
+
+void insert_cls_cgroup_entry(struct cgroup_cls_state *obj);
+void delete_cls_cgroup_entry(const u32 classid);
+struct cgroup_cls_state *find_cls_cgroup_by_classid(const u32 classid);
+
+
+#endif /* _NET_CLS_COUNTER_HOLDER_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 7d30240..6e01fc2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -906,6 +906,31 @@  config CGROUP_HUGETLB
 	  control group is tracked in the third page lru pointer. This means
 	  that we cannot use the controller with huge page less than 3 pages.
 
+menuconfig NET_CLS_CGROUP
+	tristate "Control Group Classifier"
+	select NET_CLS
+	depends on CGROUPS
+	---help---
+	  Say Y here if you want to classify packets based on the control
+	  cgroup of their process.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
+if NET_CLS_CGROUP
+config NET_CLS_COUNTER
+	bool "Network traffic counter for network Control Group Classifier"
+	select NET_CLS
+	default n
+	depends on NET_CLS_CGROUP && RESOURCE_COUNTERS
+	---help---
+	  Say Y here if you want to count traffic associate with the control
+	  cgroup.
+
+	  To add functionality to cls_cgroup select y.
+
+endif #NET_CLS_CGROUP
+
 config CGROUP_PERF
 	bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
 	depends on PERF_EVENTS && CGROUPS
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4855892..cd82a9e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2897,6 +2897,8 @@  int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 	cgroup_cfts_commit(ss, NULL, false);
 	return -ENOENT;
 }
+EXPORT_SYMBOL_GPL(cgroup_rm_cftypes);
+
 
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index ff55247..a51b501 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,8 @@ 
 #include <linux/res_counter.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
+#include <linux/export.h>
+
 
 void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
@@ -21,6 +23,7 @@  void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 	counter->soft_limit = RESOURCE_MAX;
 	counter->parent = parent;
 }
+EXPORT_SYMBOL(res_counter_init);
 
 int res_counter_charge_locked(struct res_counter *counter, unsigned long val,
 			      bool force)
@@ -176,6 +179,7 @@  u64 res_counter_read_u64(struct res_counter *counter, int member)
 	return *res_counter_member(counter, member);
 }
 #endif
+EXPORT_SYMBOL(res_counter_read_u64);
 
 int res_counter_memparse_write_strategy(const char *buf,
 					unsigned long long *res)
diff --git a/net/core/dev.c b/net/core/dev.c
index d1e8116..ffc9ec2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@ 
 #include <linux/net_tstamp.h>
 #include <linux/static_key.h>
 #include <net/flow_keys.h>
+#include <net/cls_cgroup.h>
 
 #include "net-sysfs.h"
 
@@ -2922,6 +2923,11 @@  int dev_queue_xmit(struct sk_buff *skb)
 	 */
 	rcu_read_lock_bh();
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	if (dev)
+		count_cls_snd(skb_cls_classid(skb), skb->len, dev->name);
+#endif
+
 	skb_update_prio(skb);
 
 	txq = netdev_pick_tx(dev, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ca2536..dc4dc3a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -276,6 +276,7 @@ 
 #include <net/ip.h>
 #include <net/netdma.h>
 #include <net/sock.h>
+#include <net/cls_cgroup.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -1464,6 +1465,9 @@  int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	u32 seq = tp->copied_seq;
 	u32 offset;
 	int copied = 0;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	int ifindex = 0;
+#endif
 
 	if (sk->sk_state == TCP_LISTEN)
 		return -ENOTCONN;
@@ -1510,6 +1514,9 @@  int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 			++seq;
 			break;
 		}
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+		ifindex = get_ifindex_from_skb(skb);
+#endif
 		sk_eat_skb(sk, skb, false);
 		if (!desc->count)
 			break;
@@ -1520,8 +1527,12 @@  int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 	tcp_rcv_space_adjust(sk);
 
 	/* Clean up data we have read: This will do ACK frames. */
-	if (copied > 0)
+	if (copied > 0) {
 		tcp_cleanup_rbuf(sk, copied);
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+		count_cls_rcv(current, copied, ifindex);
+#endif
+	}
 	return copied;
 }
 EXPORT_SYMBOL(tcp_read_sock);
@@ -1549,6 +1560,9 @@  int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	bool copied_early = false;
 	struct sk_buff *skb;
 	u32 urg_hole = 0;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	int ifindex = 0;
+#endif
 
 	lock_sock(sk);
 
@@ -1873,6 +1887,9 @@  skip_copy:
 		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK)) {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+			ifindex = get_ifindex_from_skb(skb);
+#endif
 			sk_eat_skb(sk, skb, copied_early);
 			copied_early = false;
 		}
@@ -1882,6 +1899,9 @@  skip_copy:
 		/* Process the FIN. */
 		++*seq;
 		if (!(flags & MSG_PEEK)) {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+			ifindex = get_ifindex_from_skb(skb);
+#endif
 			sk_eat_skb(sk, skb, copied_early);
 			copied_early = false;
 		}
@@ -1924,6 +1944,11 @@  skip_copy:
 	/* Clean up data we have read: This will do ACK frames. */
 	tcp_cleanup_rbuf(sk, copied);
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	if (copied > 0)
+		count_cls_rcv(current, copied, ifindex);
+#endif
+
 	release_sock(sk);
 	return copied;
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 79c8dbe..a143629 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -101,6 +101,7 @@ 
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <net/cls_cgroup.h>
 #include <net/net_namespace.h>
 #include <net/icmp.h>
 #include <net/route.h>
@@ -1254,6 +1255,11 @@  try_again:
 	if (flags & MSG_TRUNC)
 		err = ulen;
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	if (ulen > 0)
+		count_cls_rcv(current, ulen, get_ifindex_from_skb(skb));
+#endif
+
 out_free:
 	skb_free_datagram_locked(sk, skb);
 out:
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 235e01a..ac7bcdb 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -418,17 +418,6 @@  config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.
 
-config NET_CLS_CGROUP
-	tristate "Control Group Classifier"
-	select NET_CLS
-	depends on CGROUPS
-	---help---
-	  Say Y here if you want to classify packets based on the control
-	  cgroup of their process.
-
-	  To compile this code as a module, choose M here: the
-	  module will be called cls_cgroup.
-
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf0..95dbb12 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -49,6 +49,7 @@  obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
+obj-$(CONFIG_NET_CLS_COUNTER)   += cls_counter_holder.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3a294eb..1535a97 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -9,6 +9,7 @@ 
  * Authors:	Thomas Graf <tgraf@suug.ch>
  */
 
+#include <linux/atomic.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -22,6 +23,15 @@ 
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 #include <net/cls_cgroup.h>
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+#include <linux/rbtree.h>
+#include <net/cls_counter_holder.h>
+
+static struct notifier_block counter_notifier;
+static const char *rcv_label = "rcv:";
+static const char *snd_label = "snd:";
+
+#endif
 
 static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
 {
@@ -42,9 +52,38 @@  static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
 	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 	if (!cs)
 		return ERR_PTR(-ENOMEM);
+
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	atomic64_set(&cs->iface_stats.snd_counter, 0);
+	atomic64_set(&cs->iface_stats.rcv_counter, 0);
+	cs->iface_stats.dev_name = 0;
+	INIT_LIST_HEAD(&cs->iface_stats.link);
+#endif
+
 	return &cs->css;
 }
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline void cgrp_counter_destroy(struct cgroup_cls_state *cs)
+{
+	struct list_head *pos, *q;
+	delete_cls_cgroup_entry(cs->classid);
+
+	list_for_each_safe(pos, q, &cs->iface_stats.link) {
+		struct cls_iface_cntrs *tmp = list_entry(
+			pos, struct cls_iface_cntrs, link);
+		list_del(pos);
+		if (!tmp)
+			continue;
+
+		if (!tmp->dev_name)
+			kfree(tmp->dev_name);
+		kfree(tmp);
+	}
+
+}
+#endif
+
 static int cgrp_css_online(struct cgroup *cgrp)
 {
 	if (cgrp->parent)
@@ -55,6 +94,14 @@  static int cgrp_css_online(struct cgroup *cgrp)
 
 static void cgrp_css_free(struct cgroup *cgrp)
 {
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+
+	struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+
+	if (!cs)
+		return;
+	cgrp_counter_destroy(cs);
+#endif
 	kfree(cgrp_cls_state(cgrp));
 }
 
@@ -85,9 +132,57 @@  static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
 	return cgrp_cls_state(cgrp)->classid;
 }
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static const char *extract_dev_name(const char *cgroup_file_name)
+{
+	const char *dot = strchr(cgroup_file_name, '.');
+	const size_t len = dot ?
+			dot - cgroup_file_name : strlen(cgroup_file_name);
+
+	return kstrndup(cgroup_file_name, len, GFP_KERNEL);
+}
+
+static int read_stat(struct cgroup *cgrp, struct cftype *cft,
+		struct cgroup_map_cb *cb)
+{
+	struct cgroup_cls_state *cs = cgrp_cls_state(cgrp);
+	const char *dev_name = extract_dev_name(cft->name);
+	struct cls_iface_cntrs *res = find_cls_counter(cs, dev_name, false);
+
+	if (!res) {
+		pr_debug("cls cant read for cls");
+		return -EINVAL;
+	}
+
+	cb->fill(cb, rcv_label,
+		atomic64_read(&res->rcv_counter));
+	cb->fill(cb, snd_label,
+		atomic64_read(&res->snd_counter));
+
+	kfree(dev_name);
+	return 0;
+}
+#endif /*CONFIG_NET_CLS_COUNTER*/
+
 static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
 {
-	cgrp_cls_state(cgrp)->classid = (u32) value;
+	struct cgroup_cls_state *cgrp_cls = cgrp_cls_state(cgrp);
+	u32 *classid = &cgrp_cls->classid;
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	u32 oldclassid = *classid;
+
+	if (find_cls_cgroup_by_classid(value)) {
+		pr_err("cls: classid %llu already exists\n", value);
+		return -EINVAL;
+	}
+
+	insert_cls_cgroup_entry(cgrp_cls);
+
+	if (oldclassid)
+		delete_cls_cgroup_entry(oldclassid);
+#endif /*CONFIG_NET_CLS_COUNTER*/
+	*classid = (u32) value;
+
 	return 0;
 }
 
@@ -304,17 +399,107 @@  static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 	.owner		=	THIS_MODULE,
 };
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+static inline int add_cft_file_for_device(struct net_device *dev)
+{
+	struct cftype *cft;
+	int ret = 0;
+
+	if (!dev)
+		return ret;
+
+	cft = kmalloc(sizeof(struct cftype) * 2,
+		GFP_KERNEL);
+	/* *2 and last 0 fill for terminator */
+	memset(cft, 0, sizeof(struct cftype) * 2);
+
+	snprintf(cft->name, MAX_CFTYPE_NAME,
+		"%s.usage_in_bytes", dev->name);
+	cft->read_map = read_stat;
+	cft->private = RES_USAGE;
+	ret = cgroup_add_cftypes(&net_cls_subsys, cft);
+	if (ret)
+		pr_err("cls error adding cft for counting at cls_cgroup %d\n",
+		       ret);
+	return ret;
+}
+
+static int device_state_cb(struct notifier_block *nb,
+	unsigned long state, void *arg)
+{
+	struct net_device *net = (struct net_device *)arg;
+	if (!nb || !net) {
+		pr_err("Not valid arguments for net_device notifier cb\n");
+		return 0;
+	}
+
+	if (state == NETDEV_REGISTER) {
+		pr_info("cls New device %s\n", net->name);
+		return add_cft_file_for_device(net);
+	}
+	return 0;
+}
+
+static inline int init_cgroup_counter(void)
+{
+	int ret = 0;
+	struct net_device *dev;
+	counter_notifier.notifier_call = device_state_cb;
+
+	ret = register_netdevice_notifier(&counter_notifier);
+	if (ret)
+		pr_err("cls Cant register nofier\n");
+
+	for_each_netdev(&init_net, dev) {
+		ret = add_cft_file_for_device(dev);
+		if (ret)
+			goto unregister_notifier;
+	}
+
+	return ret;
+unregister_notifier:
+
+	unregister_netdevice_notifier(&counter_notifier);
+	return ret;
+}
+
+static void release_cft(void)
+{
+	struct list_head *pos, *q;
+	list_for_each_safe(pos, q, &net_cls_subsys.cftsets) {
+		struct cftype_set *set =
+				list_entry(pos, struct cftype_set, node);
+		int ret = cgroup_rm_cftypes(&net_cls_subsys, set->cfts);
+		if (!ret) {
+			pr_err("cls cant remove cftypes\n");
+			break;
+		}
+
+		kfree(set->cfts);
+	}
+}
+#endif
+
 static int __init init_cgroup_cls(void)
 {
 	int ret;
-
 	ret = cgroup_load_subsys(&net_cls_subsys);
 	if (ret)
 		goto out;
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	ret = init_cgroup_counter();
+	if (ret)
+		goto unload;
+#endif
+
 	ret = register_tcf_proto_ops(&cls_cgroup_ops);
 	if (ret)
-		cgroup_unload_subsys(&net_cls_subsys);
+		goto unload;
+
+	return 0;
+unload:
+	cgroup_unload_subsys(&net_cls_subsys);
 
 out:
 	return ret;
@@ -324,6 +509,9 @@  static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
 
+#if IS_ENABLED(CONFIG_NET_CLS_COUNTER)
+	release_cft();
+#endif
 	cgroup_unload_subsys(&net_cls_subsys);
 }
 
diff --git a/net/sched/cls_counter_holder.c b/net/sched/cls_counter_holder.c
new file mode 100644
index 0000000..94ab285
--- /dev/null
+++ b/net/sched/cls_counter_holder.c
@@ -0,0 +1,144 @@ 
+/*
+ * net/sched/cls_counter_holder.c Interface for holding references of the
+ *                     net cls cgroup instances.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Perevalov <a.perevalov@samsung.com>
+ */
+
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <net/cls_cgroup.h>
+#include <net/cls_counter_holder.h>
+
+static struct rb_root classid_tree = RB_ROOT;
+static DEFINE_SPINLOCK(classid_tree_lock);
+
+struct entry {
+	struct cgroup_cls_state *data;
+	struct rb_node node;
+};
+
+static struct entry *find_entry(struct rb_root *root, const u32 classid)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct entry *cls_entry = rb_entry(node, struct entry, node);
+		int result = 0;
+		if (!cls_entry || !cls_entry->data)
+			break;
+		result = cls_entry->data->classid - classid;
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return cls_entry;
+	}
+	return NULL;
+}
+
+void insert_cls_cgroup_entry(struct cgroup_cls_state *obj)
+{
+	struct rb_node **new;
+	struct rb_node *parent = NULL;
+	struct entry *new_entry;
+	unsigned long irq_flags = 0;
+
+	struct rb_root *root = &classid_tree;
+
+	spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+	new = &root->rb_node;
+
+	while (*new) {
+		struct entry *this = rb_entry(*new, struct entry, node);
+		/* Sort by classid, then by ifindex */
+		int result =
+			(this->data->classid - obj->classid);
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			goto unlock;
+	}
+
+	/* If we here, we need to insert new entry into tree */
+	new_entry = kmalloc(sizeof(struct entry), GFP_ATOMIC);
+	if (!new_entry)
+		goto unlock;
+
+	new_entry->data = obj;
+	/* Add new node and rebalance tree */
+	rb_link_node(&new_entry->node, parent, new);
+	rb_insert_color(&new_entry->node, root);
+
+unlock:
+	spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+EXPORT_SYMBOL(insert_cls_cgroup_entry);
+
+void delete_cls_cgroup_entry(const u32 classid)
+{
+	unsigned long irq_flags = 0;
+	struct entry *data = NULL;
+	struct rb_root *root = &classid_tree;
+	spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+	data = find_entry(root, classid);
+
+	if (data) {
+		rb_erase(&data->node, root);
+		kfree(data);
+	}
+	spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+EXPORT_SYMBOL(delete_cls_cgroup_entry);
+
+static void free_node(struct rb_node *root)
+{
+	struct entry *cur_entry = rb_entry(root, struct entry, node);
+	if (root->rb_left)
+		free_node(root->rb_left);
+	if (root->rb_right)
+		free_node(root->rb_right);
+	kfree(cur_entry);
+}
+
+static void free_classid_tree(void)
+{
+	unsigned long irq_flags = 0;
+
+	spin_lock_irqsave(&classid_tree_lock, irq_flags);
+
+	free_node(classid_tree.rb_node);
+
+	spin_unlock_irqrestore(&classid_tree_lock, irq_flags);
+}
+
+struct cgroup_cls_state *find_cls_cgroup_by_classid(const u32 classid)
+{
+	struct entry *cls_entry = find_entry(&classid_tree, classid);
+	if (cls_entry)
+		return cls_entry->data;
+
+	return NULL;
+}
+EXPORT_SYMBOL(find_cls_cgroup_by_classid);
+
+static void __exit exit_cls_counter_holder(void)
+{
+	free_classid_tree();
+}
+
+module_exit(exit_cls_counter_holder);
+MODULE_LICENSE("GPL");