Patchwork [v2,2/5] ipset: add "inner" flag implementation

login
register
mail settings
Submitter Mr Dash Four
Date June 16, 2013, 11:27 p.m.
Message ID <51BE49D4.1010901@googlemail.com>
Download mbox | patch
Permalink /patch/251747/
State Changes Requested
Delegated to: Jozsef Kadlecsik
Headers show

Comments

Mr Dash Four - June 16, 2013, 11:27 p.m.
This patch implements "inner" flag option in set iptables match,
allowing matching based on the properties (source/destination IP address,
protocol, port and so on) of the original (inner) connection in the event
of the following ICMP[v4,v6] messages:

ICMPv4 destination-unreachable (code 3);
ICMPv4 source-quench (code 4);
ICMPv4 time-exceeded (code 11);
ICMPv6 destination-unreachable (code 1);
ICMPv6 packet-too-big (code 2);
ICMPv6 time-exceeded (code 3);

Revision history:

v1 * initial revision
v2 * redundant code removed;
    * added a new header file (ip_set_icmp.h) with 2 inline functions,
      allowing access to the internal icmp header properties;
    * removed ip[46]inneraddr[ptr]functions as they are no longer needed
    * added new ipv[46]addr[ptr] and ip_set_get*port functions, the old
      functions are still preserved for backwards compatibility

Signed-off-by: Dash Four <mr.dash.four@googlemail.com>
---
  kernel/include/linux/netfilter/ipset/ip_set.h      |   66 ++++++++++++++
  .../include/linux/netfilter/ipset/ip_set_getport.h |   16 ++++
  kernel/include/linux/netfilter/ipset/ip_set_icmp.h |   91 ++++++++++++++++++++
  kernel/include/uapi/linux/netfilter/ipset/ip_set.h |    2 +
  kernel/net/netfilter/ipset/ip_set_getport.c        |   78 +++++++++++++----
  5 files changed, 238 insertions(+), 15 deletions(-)
  create mode 100644 kernel/include/linux/netfilter/ipset/ip_set_icmp.h


--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik - June 26, 2013, 8:27 p.m.
On Mon, 17 Jun 2013, Dash Four wrote:

> This patch implements "inner" flag option in set iptables match,
> allowing matching based on the properties (source/destination IP address,
> protocol, port and so on) of the original (inner) connection in the event
> of the following ICMP[v4,v6] messages:
> 
> ICMPv4 destination-unreachable (code 3);
> ICMPv4 source-quench (code 4);
> ICMPv4 time-exceeded (code 11);
> ICMPv6 destination-unreachable (code 1);
> ICMPv6 packet-too-big (code 2);
> ICMPv6 time-exceeded (code 3);
> 
> Revision history:

The patchset is a good step forward.

I have no objections against the other patches, however I have to comment 
this one.
 
> v1 * initial revision
> v2 * redundant code removed;
>    * added a new header file (ip_set_icmp.h) with 2 inline functions,
>      allowing access to the internal icmp header properties;
>    * removed ip[46]inneraddr[ptr]functions as they are no longer needed
>    * added new ipv[46]addr[ptr] and ip_set_get*port functions, the old
>      functions are still preserved for backwards compatibility

Get rid of the compatibility functions. Or keep the original function 
names with the extended arguments: there's no stable API.
 
> Signed-off-by: Dash Four <mr.dash.four@googlemail.com>
> ---
>  kernel/include/linux/netfilter/ipset/ip_set.h      |   66 ++++++++++++++
>  .../include/linux/netfilter/ipset/ip_set_getport.h |   16 ++++
>  kernel/include/linux/netfilter/ipset/ip_set_icmp.h |   91
> ++++++++++++++++++++
>  kernel/include/uapi/linux/netfilter/ipset/ip_set.h |    2 +
>  kernel/net/netfilter/ipset/ip_set_getport.c        |   78 +++++++++++++----
>  5 files changed, 238 insertions(+), 15 deletions(-)
>  create mode 100644 kernel/include/linux/netfilter/ipset/ip_set_icmp.h
> 
> diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h
> b/kernel/include/linux/netfilter/ipset/ip_set.h
> index 8499e25..5b6ef72 100644
> --- a/kernel/include/linux/netfilter/ipset/ip_set.h
> +++ b/kernel/include/linux/netfilter/ipset/ip_set.h
> @@ -17,9 +17,13 @@
>  #include <linux/netfilter/x_tables.h>
>  #include <linux/stringify.h>
>  #include <linux/vmalloc.h>
> +#include <net/ip.h>
> +#include <net/ipv6.h>
> +#include <net/icmp.h>
>  #include <net/netlink.h>
>  #include <uapi/linux/netfilter/ipset/ip_set.h>
>  #include <linux/netfilter/ipset/ip_set_compat.h>
> +#include <linux/netfilter/ipset/ip_set_icmp.h>

I don't really like the new header file. ip[v]4|6addrptr are generic 
functions (from ipset point of view) and getting the inner header is too, 
so those deserve to be in the ipset core.

>  #define _IP_SET_MODULE_DESC(a, b, c)		\
>  	MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c)
> @@ -361,6 +365,25 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb,
> int type,
>  }
> 
>  /* Get address from skbuff */
> +static inline bool
> +ipv4addrptr(const struct sk_buff *skb, bool inner, bool src, __be32 *addr)
> +{
> +	struct iphdr *ih = ip_hdr(skb);
> +	unsigned int protooff = ip_hdrlen(skb);
> +
> +	if (ih == NULL ||

Why do you have to check the IP header?

> +	    (inner &&
> +	     (ih->protocol != IPPROTO_ICMP ||
> +	      !ip_set_get_icmpv4_inner_hdr(skb, &protooff, &ih))))
> +			goto err;

Move the protocol checking into ip_set_get_icmpv4_inner_hdr. Also, I 
suggested the name ip_set_get[_ip4]_inner_hdr because I want to extend it 
to support IPIP and GRE too. By moving the protocol checking into the 
function and using a not so specific name, those make possible the 
extension straightforward. Also, there's no need for the goto here: 
there's no other error path.

> +	*addr = src ? ih->saddr : ih->daddr;
> +	return true;
> +
> +err:
> +	return false;
> +}
> +
>  static inline __be32
>  ip4addr(const struct sk_buff *skb, bool src)
>  {
> @@ -373,12 +396,55 @@ ip4addrptr(const struct sk_buff *skb, bool src, __be32
> *addr)
>  	*addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
>  }
> 
> +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
> +static inline bool
> +ipv6addrptr(const struct sk_buff *skb, bool inner, bool src,
> +	    struct in6_addr *addr)
> +{
> +	struct ipv6hdr *ih = ipv6_hdr(skb);
> +
> +	if (ih == NULL)
> +		goto err;

The same comment as above.

> +	if (inner) {
> +		unsigned int protooff;
> +		u8 nexthdr = ih->nexthdr;
> +		__be16 frag_off;
> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
> +		protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
> +					    &nexthdr, &frag_off);
> +#else
> +		protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
> +					    &nexthdr);
> +#endif
> +		if (protooff < 0 || nexthdr != IPPROTO_ICMPV6 ||
> +		    !ip_set_get_icmpv6_inner_hdr(skb, &protooff, &ih))
> +			goto err;
> +	}

Move the whole code in the braces into ip_set_get_ipv6_inner_hdr, for the 
same reason as above.

> +	memcpy(addr, src ? &ih->saddr : &ih->daddr, sizeof(*addr));
> +	return true;
> +
> +err:
> +	return false;
> +}
> +
>  static inline void
>  ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
>  {
>  	memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr,
>  	       sizeof(*addr));
>  }
> +#else
> +static inline bool
> +ipv6addrptr(const struct sk_buff *skb, bool inner, bool src,
> +	    struct in6_addr *addr)
> +{
> +	return false;
> +}
> +
> +static inline void
> +ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) { ; }
> +#endif
> 
>  /* Calculate the bytes required to store the inclusive range of a-b */
>  static inline int
> diff --git a/kernel/include/linux/netfilter/ipset/ip_set_getport.h
> b/kernel/include/linux/netfilter/ipset/ip_set_getport.h
> index 90d0930..8cdc177 100644
> --- a/kernel/include/linux/netfilter/ipset/ip_set_getport.h
> +++ b/kernel/include/linux/netfilter/ipset/ip_set_getport.h
> @@ -1,13 +1,26 @@
>  #ifndef _IP_SET_GETPORT_H
>  #define _IP_SET_GETPORT_H
> 
> +extern bool ip_set_get_ipv4_port(const struct sk_buff *skb, bool inner,
> +				 bool src, __be16 *port, u8 *proto);
> +
>  extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
>  				__be16 *port, u8 *proto);
> 
>  #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
> +extern bool ip_set_get_ipv6_port(const struct sk_buff *skb, bool inner,
> +				 bool src, __be16 *port, u8 *proto);
> +
>  extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
>  				__be16 *port, u8 *proto);
>  #else
> +static inline bool ip_set_get_ipv6_port(const struct sk_buff *skb,
> +					bool inner, bool src,
> +					__be16 *port, u8 *proto)
> +{
> +	return false;
> +}
> +
>  static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
>  				       __be16 *port, u8 *proto)
>  {
> @@ -15,6 +28,9 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff
> *skb, bool src,
>  }
>  #endif
> 
> +extern bool ip_set_get_ipv_port(const struct sk_buff *skb, u8 pf, bool inner,
> +				bool src, __be16 *port);
> +
>  extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
>  				__be16 *port);
> 
> diff --git a/kernel/include/linux/netfilter/ipset/ip_set_icmp.h
> b/kernel/include/linux/netfilter/ipset/ip_set_icmp.h
> new file mode 100644
> index 0000000..e116d28
> --- /dev/null
> +++ b/kernel/include/linux/netfilter/ipset/ip_set_icmp.h
> @@ -0,0 +1,91 @@
> +#ifndef _IP_SET_ICMP_H
> +#define _IP_SET_ICMP_H
> +
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <linux/icmp.h>
> +
> +static inline
> +bool ip_set_get_icmpv4_inner_hdr(const struct sk_buff *skb,
> +				 unsigned int *offset,
> +				 struct iphdr **ih)
> +{
> +	u8 type;
> +	struct iphdr _iph;
> +	struct icmphdr _icmph;
> +	struct iphdr *iph;
> +	const struct icmphdr *ich;
> +	/* RFC 1122: 3.2.2: req'd len: IP header + 8 bytes of inner header */
> +	static const size_t req_len = sizeof(struct iphdr) + 8;
> +
> +	if (offset == NULL || ih == NULL)
> +		goto err;
> +
> +	ich = skb_header_pointer(skb, *offset, sizeof(_icmph), &_icmph);
> +	if (ich == NULL ||
> +	    (ich->type <= NR_ICMP_TYPES && skb->len - *offset < req_len))
> +		goto err;
> +
> +	type = ich->type;
> +	if (type == ICMP_DEST_UNREACH ||
> +	    type == ICMP_SOURCE_QUENCH ||
> +	    type == ICMP_TIME_EXCEEDED) {
> +		*offset += sizeof(_icmph);
> +		iph = skb_header_pointer(skb, *offset, sizeof(_iph), &_iph);
> +		if (iph == NULL || ntohs(iph->frag_off) & IP_OFFSET)
> +			goto err;
> +
> +		*ih = iph;
> +		return true;
> +	}
> +
> +err:
> +	return false;
> +}
> +
> +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
> +static inline
> +bool ip_set_get_icmpv6_inner_hdr(const struct sk_buff *skb,
> +				 unsigned int *offset,
> +				 struct ipv6hdr **ih)
> +{
> +	u8 type;
> +	const struct icmp6hdr *ic;
> +	struct icmp6hdr _icmp6h;
> +	struct ipv6hdr _ip6h;
> +	struct ipv6hdr *iph;
> +
> +	if (offset == NULL || ih == NULL)
> +		goto err;
> +
> +	ic = skb_header_pointer(skb, *offset, sizeof(_icmp6h), &_icmp6h);
> +	if (ic == NULL)
> +		goto err;
> +
> +	type = ic->icmp6_type;
> +	if (type == ICMPV6_DEST_UNREACH ||
> +	    type == ICMPV6_PKT_TOOBIG ||
> +	    type == ICMPV6_TIME_EXCEED) {
> +		*offset += sizeof(_icmp6h);
> +		iph = skb_header_pointer(skb, *offset, sizeof(_ip6h), &_ip6h);
> +		if (iph == NULL)
> +			goto err;
> +
> +		*ih = iph;
> +		return true;
> +	}
> +
> +err:
> +	return false;
> +}
> +#else
> +static inline
> +bool ip_set_get_icmpv6_inner_hdr(const struct sk_buff *skb,
> +				 unsigned int offset,
> +				 struct ipv6hdr **ih)
> +{
> +	return false;
> +}
> +#endif
> +
> +#endif /*_IP_SET_ICMP_H*/

Move the functions into ip_set_core.c as non-inline ones and export them.

> diff --git a/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
> b/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
> index 8024cdf..e9e6586 100644
> --- a/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
> +++ b/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
> @@ -161,6 +161,8 @@ enum ipset_cmd_flags {
>  		(1 << IPSET_FLAG_BIT_SKIP_SUBCOUNTER_UPDATE),
>  	IPSET_FLAG_BIT_MATCH_COUNTERS = 5,
>  	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
> +	IPSET_FLAG_BIT_INNER = 6,
> +	IPSET_FLAG_INNER = (1 << IPSET_FLAG_BIT_INNER),
>  	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
>  	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
>  	IPSET_FLAG_CMD_MAX = 15,
> diff --git a/kernel/net/netfilter/ipset/ip_set_getport.c
> b/kernel/net/netfilter/ipset/ip_set_getport.c
> index a0d96eb..73ccfb3 100644
> --- a/kernel/net/netfilter/ipset/ip_set_getport.c
> +++ b/kernel/net/netfilter/ipset/ip_set_getport.c
> @@ -19,7 +19,9 @@
>  #include <linux/netfilter_ipv6/ip6_tables.h>
>  #include <net/ip.h>
>  #include <net/ipv6.h>
> +#include <net/icmp.h>
> 
> +#include <linux/netfilter/ipset/ip_set_icmp.h>
>  #include <linux/netfilter/ipset/ip_set_getport.h>
> 
>  /* We must handle non-linear skbs */
> @@ -97,10 +99,10 @@ get_port(const struct sk_buff *skb, int protocol, unsigned
> int protooff,
>  }
> 
>  bool
> -ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
> -		    __be16 *port, u8 *proto)
> +ip_set_get_ipv4_port(const struct sk_buff *skb, bool inner, bool src,
> +		     __be16 *port, u8 *proto)
>  {
> -	const struct iphdr *iph = ip_hdr(skb);
> +	struct iphdr *iph = ip_hdr(skb);
>  	unsigned int protooff = ip_hdrlen(skb);
>  	int protocol = iph->protocol;
> 
> @@ -116,54 +118,93 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
>  		case IPPROTO_UDPLITE:
>  		case IPPROTO_ICMP:
>  			/* Port info not available for fragment offset > 0 */
> -			return false;
> +			goto err;
>  		default:
>  			/* Other protocols doesn't have ports,
>  			   so we can match fragments */
>  			*proto = protocol;
>  			return true;
>  		}
> +	if (inner) {
> +		if (protocol != IPPROTO_ICMP ||
> +		    !ip_set_get_icmpv4_inner_hdr(skb, &protooff, &iph))
> +			goto err;
> 
> +		protocol = iph->protocol;
> +		protooff += iph->ihl*4;
> +	}
>  	return get_port(skb, protocol, protooff, src, port, proto);
> +
> +err:
> +	return false;

If there's nothing else in the error path just a return, then the goto 
is unnecessary.

> +}
> +EXPORT_SYMBOL_GPL(ip_set_get_ipv4_port);
> +
> +bool
> +ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
> +		    __be16 *port, u8 *proto)
> +{
> +	return ip_set_get_ipv4_port(skb, false, src, port, proto);
>  }
>  EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);
> 
>  #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
>  bool
> -ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
> -		    __be16 *port, u8 *proto)
> +ip_set_get_ipv6_port(const struct sk_buff *skb, bool inner, bool src,
> +		     __be16 *port, u8 *proto)
>  {
> -	int protoff;
> +	unsigned int protooff;
>  	u8 nexthdr;
>  	__be16 frag_off = 0;
> 
>  	nexthdr = ipv6_hdr(skb)->nexthdr;
>  #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
> -	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
> +	protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
>  				   &frag_off);
>  #else
> -	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
> +	protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
>  #endif
> -	if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
> -		return false;
> +	if (protooff < 0 || (frag_off & htons(~0x7)) != 0)
> +		goto err;
> +
> +	if (inner) {
> +		struct ipv6hdr *ih;
> +		if (nexthdr != IPPROTO_ICMPV6 ||
> +		    !ip_set_get_icmpv6_inner_hdr(skb, &protooff, &ih))
> +			goto err;
> 
> -	return get_port(skb, nexthdr, protoff, src, port, proto);
> +		nexthdr = ih->nexthdr;
> +		protooff += sizeof(struct ipv6hdr);
> +	}
> +	return get_port(skb, nexthdr, protooff, src, port, proto);
> +
> +err:
> +	return false;
> +}
> +EXPORT_SYMBOL_GPL(ip_set_get_ipv6_port);
> +
> +bool
> +ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
> +		    __be16 *port, u8 *proto)
> +{
> +	return ip_set_get_ipv6_port(skb, false, src, port, proto);
>  }
>  EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
>  #endif
> 
>  bool
> -ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
> +ip_set_get_ipv_port(const struct sk_buff *skb, u8 pf, bool inner, bool src,
> +		    __be16 *port)
>  {
>  	bool ret;
>  	u8 proto;
> 
>  	switch (pf) {
>  	case NFPROTO_IPV4:
> -		ret = ip_set_get_ip4_port(skb, src, port, &proto);
> +		ret = ip_set_get_ipv4_port(skb, inner, src, port, &proto);
>  		break;
>  	case NFPROTO_IPV6:
> -		ret = ip_set_get_ip6_port(skb, src, port, &proto);
> +		ret = ip_set_get_ipv6_port(skb, inner, src, port, &proto);
>  		break;
>  	default:
>  		return false;
> @@ -178,4 +219,11 @@ ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool
> src, __be16 *port)
>  		return false;
>  	}
>  }
> +EXPORT_SYMBOL_GPL(ip_set_get_ipv_port);
> +
> +bool
> +ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
> +{
> +	return ip_set_get_ipv_port(skb, pf, false, src, port);
> +}
>  EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
> 

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mr Dash Four - June 27, 2013, 10:36 p.m.
Jozsef Kadlecsik wrote:
> Get rid of the compatibility functions. Or keep the original function 
> names with the extended arguments: there's no stable API.
>   
As you've probably guessed, I kept the old functions and their 
parameters in order to preserve the existing interface API, since any 
changes I make to these will break existing code using them. If there is 
no objection and there is no such requirement, I'll get rid of them in 
the next release of the patches - just let me know.

>> +#include <linux/netfilter/ipset/ip_set_icmp.h>
>>     
>
> I don't really like the new header file. ip[v]4|6addrptr are generic 
> functions (from ipset point of view) and getting the inner header is too, 
> so those deserve to be in the ipset core.
>   
That's a fair comment - I'll move them in ip_set_core.c as you suggest 
below.

>>  #define _IP_SET_MODULE_DESC(a, b, c)		\
>>  	MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c)
>> @@ -361,6 +365,25 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb,
>> int type,
>>  }
>>
>>  /* Get address from skbuff */
>> +static inline bool
>> +ipv4addrptr(const struct sk_buff *skb, bool inner, bool src, __be32 *addr)
>> +{
>> +	struct iphdr *ih = ip_hdr(skb);
>> +	unsigned int protooff = ip_hdrlen(skb);
>> +
>> +	if (ih == NULL ||
>>     
>
> Why do you have to check the IP header?
>   
Simply playing it safe. I am referencing it later on in the code and if 
that pointer is NULL for whatever reason, it will trigger a null-pointer 
dereference, hence the check above.

>> +	    (inner &&
>> +	     (ih->protocol != IPPROTO_ICMP ||
>> +	      !ip_set_get_icmpv4_inner_hdr(skb, &protooff, &ih))))
>> +			goto err;
>>     
>
> Move the protocol checking into ip_set_get_icmpv4_inner_hdr. Also, I 
> suggested the name ip_set_get[_ip4]_inner_hdr because I want to extend it 
> to support IPIP and GRE too. By moving the protocol checking into the 
> function and using a not so specific name, those make possible the 
> extension straightforward.
Noted - will do that in the next version of the patches.

>  Also, there's no need for the goto here: 
> there's no other error path.
>   
I disagree. By having "return false" (or "return 0", "return -1" and so 
on) instead of "goto err" it is not immediately apparent to someone who 
studies/reviews/uses the code that this is an error condition/path. I 
have been in that situation many times when I have to go back and look 
at a particular function call to see what "return false" or "return 0" 
actually means.

By including "goto err" instead of multiple "return false" statement, 
that makes it instantly obvious what the purpose of that statement is 
without having to look elsewhere.

>> +	*addr = src ? ih->saddr : ih->daddr;
>> +	return true;
>> +
>> +err:
>> +	return false;
>> +}
>> +
>>  static inline __be32
>>  ip4addr(const struct sk_buff *skb, bool src)
>>  {
>> @@ -373,12 +396,55 @@ ip4addrptr(const struct sk_buff *skb, bool src, __be32
>> *addr)
>>  	*addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
>>  }
>>
>> +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
>> +static inline bool
>> +ipv6addrptr(const struct sk_buff *skb, bool inner, bool src,
>> +	    struct in6_addr *addr)
>> +{
>> +	struct ipv6hdr *ih = ipv6_hdr(skb);
>> +
>> +	if (ih == NULL)
>> +		goto err;
>>     
>
> The same comment as above.
>   
Same answer - it is simply used for code clarity and makes the code 
easier to understand. The presence of "goto" instead of multiple "return 
false" (or "return 0") statements is also negligible in terms of 
performance impact.

>> +	if (inner) {
>> +		unsigned int protooff;
>> +		u8 nexthdr = ih->nexthdr;
>> +		__be16 frag_off;
>> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
>> +		protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
>> +					    &nexthdr, &frag_off);
>> +#else
>> +		protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
>> +					    &nexthdr);
>> +#endif
>> +		if (protooff < 0 || nexthdr != IPPROTO_ICMPV6 ||
>> +		    !ip_set_get_icmpv6_inner_hdr(skb, &protooff, &ih))
>> +			goto err;
>> +	}
>>     
>
> Move the whole code in the braces into ip_set_get_ipv6_inner_hdr, for the 
> same reason as above.
>   
Noted, will include this in the next version of the patches.

>> +#endif /*_IP_SET_ICMP_H*/
>>     
>
> Move the functions into ip_set_core.c as non-inline ones and export them.
>   
Yep, will do that in the next submission.

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Haran - June 27, 2013, 10:45 p.m.
> -----Original Message-----
> From: netfilter-devel-owner@vger.kernel.org [mailto:netfilter-devel-owner@vger.kernel.org] On Behalf Of Dash Four
> Sent: Thursday, June 27, 2013 3:37 PM
> To: Jozsef Kadlecsik
> Cc: Pablo Neira Ayuso; Netfilter Core Team
> Subject: Re: [PATCH v2 2/5] ipset: add "inner" flag implementation
> 
...
> I disagree. By having "return false" (or "return 0", "return -1" and so
> on) instead of "goto err" it is not immediately apparent to someone who
> studies/reviews/uses the code that this is an error condition/path. I
> have been in that situation many times when I have to go back and look
> at a particular function call to see what "return false" or "return 0"
> actually means.
> 
> By including "goto err" instead of multiple "return false" statement,
> that makes it instantly obvious what the purpose of that statement is
> without having to look elsewhere.

I suppose as an alternative you could go way against the usual practice and put some text in a function header comment block indicating what the return code means. I know it doesn't get used much but C has had this /* comment */ thing for a long time. I've never understood why more people don't use it.

Jeff Haran

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mr Dash Four - June 28, 2013, 8:27 p.m.
Jeff Haran wrote:
> I suppose as an alternative you could go way against the usual practice and put some text in a function header comment block indicating what the return code means. I know it doesn't get used much but C has had this /* comment */ thing for a long time. I've never understood why more people don't use it.
>   
All true. I guess I could go the full hog and document the entire 
function, then use "return false" as Jozsef suggested, but this will 
stick out as a sore thumb as it will be the only function documented in 
the entire module.

Jozsef, I don't mind doing either of the suggested alternatives: keep 
"goto err" or document the entire function and use "return false" - let 
me know what your preference is.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik - June 29, 2013, 11:07 a.m.
On Thu, 27 Jun 2013, Dash Four wrote:

> Jozsef Kadlecsik wrote:
> > Get rid of the compatibility functions. Or keep the original function names
> > with the extended arguments: there's no stable API.
> >   
> As you've probably guessed, I kept the old functions and their parameters in
> order to preserve the existing interface API, since any changes I make to
> these will break existing code using them. If there is no objection and there
> is no such requirement, I'll get rid of them in the next release of the
> patches - just let me know.

Yes, there's no such requirement and remove the backward compatibility 
part.
 
> > >  #define _IP_SET_MODULE_DESC(a, b, c)		\
> > >  	MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c)
> > > @@ -361,6 +365,25 @@ static inline int nla_put_ipaddr6(struct sk_buff
> > > *skb,
> > > int type,
> > >  }
> > > 
> > >  /* Get address from skbuff */
> > > +static inline bool
> > > +ipv4addrptr(const struct sk_buff *skb, bool inner, bool src, __be32
> > > *addr)
> > > +{
> > > +	struct iphdr *ih = ip_hdr(skb);
> > > +	unsigned int protooff = ip_hdrlen(skb);
> > > +
> > > +	if (ih == NULL ||
> > >     
> > 
> > Why do you have to check the IP header?
> >   
> Simply playing it safe. I am referencing it later on in the code and if that
> pointer is NULL for whatever reason, it will trigger a null-pointer
> dereference, hence the check above.

The return value of ip[6]_hdr was never checked in ipset code yet, because 
it doesn't return NULL at the point where these code parts are called. 
It's an unnecessary checking.
 
> > > +	    (inner &&
> > > +	     (ih->protocol != IPPROTO_ICMP ||
> > > +	      !ip_set_get_icmpv4_inner_hdr(skb, &protooff, &ih))))
> > > +			goto err;
> > > 
> 
> >  Also, there's no need for the goto here: there's no other error path.
> >   
> I disagree. By having "return false" (or "return 0", "return -1" and so on)
> instead of "goto err" it is not immediately apparent to someone who
> studies/reviews/uses the code that this is an error condition/path. I have
> been in that situation many times when I have to go back and look at a
> particular function call to see what "return false" or "return 0" actually
> means.
> 
> By including "goto err" instead of multiple "return false" statement, that
> makes it instantly obvious what the purpose of that statement is without
> having to look elsewhere.

I see the point, to self-document the code, with the price of more lines. 
It's a little bit overdoing in my opinion: pretty apparent which is the 
error path and which is not. But this is highly a personal taste, I won't 
press it.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik - June 29, 2013, 11:10 a.m.
On Thu, 27 Jun 2013, Jeff Haran wrote:

> ...
> > I disagree. By having "return false" (or "return 0", "return -1" and so
> > on) instead of "goto err" it is not immediately apparent to someone who
> > studies/reviews/uses the code that this is an error condition/path. I
> > have been in that situation many times when I have to go back and look
> > at a particular function call to see what "return false" or "return 0"
> > actually means.
> > 
> > By including "goto err" instead of multiple "return false" statement,
> > that makes it instantly obvious what the purpose of that statement is
> > without having to look elsewhere.
> 
> I suppose as an alternative you could go way against the usual practice 
> and put some text in a function header comment block indicating what the 
> return code means. I know it doesn't get used much but C has had this /* 
> comment */ thing for a long time. I've never understood why more people 
> don't use it.

In general I'd agree with you, but this is a boolean function. So why 
should the the meaning of the return value "true" and "false" be 
explained? (This is also why I regard the goto unnecessary.)

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mr Dash Four - June 29, 2013, 2:05 p.m.
Jozsef Kadlecsik wrote:
>> As you've probably guessed, I kept the old functions and their parameters in
>> order to preserve the existing interface API, since any changes I make to
>> these will break existing code using them. If there is no objection and there
>> is no such requirement, I'll get rid of them in the next release of the
>> patches - just let me know.
>>     
>
> Yes, there's no such requirement and remove the backward compatibility 
> part.
>   
OK, I'll remove it then.

> The return value of ip[6]_hdr was never checked in ipset code yet, because 
> it doesn't return NULL at the point where these code parts are called. 
> It's an unnecessary checking.
>   
Fair enough - I'll remove that check too.

>> I disagree. By having "return false" (or "return 0", "return -1" and so on)
>> instead of "goto err" it is not immediately apparent to someone who
>> studies/reviews/uses the code that this is an error condition/path. I have
>> been in that situation many times when I have to go back and look at a
>> particular function call to see what "return false" or "return 0" actually
>> means.
>>
>> By including "goto err" instead of multiple "return false" statement, that
>> makes it instantly obvious what the purpose of that statement is without
>> having to look elsewhere.
>>     
>
> I see the point, to self-document the code, with the price of more lines. 
> It's a little bit overdoing in my opinion: pretty apparent which is the 
> error path and which is not. But this is highly a personal taste, I won't 
> press it.
>   
In other words, you'll be happy for me to leave things as they are - 
with the "goto"?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jozsef Kadlecsik - June 29, 2013, 6:13 p.m.
On Sat, 29 Jun 2013, Dash Four wrote:

> > > By including "goto err" instead of multiple "return false" statement, that
> > > makes it instantly obvious what the purpose of that statement is without
> > > having to look elsewhere. 
> > 
> > I see the point, to self-document the code, with the price of more lines.
> > It's a little bit overdoing in my opinion: pretty apparent which is the
> > error path and which is not. But this is highly a personal taste, I won't
> > press it.
> >   
> In other words, you'll be happy for me to leave things as they are - 
> with the "goto"?

I won't be happy but I can stomach it. So you can leave the gotos there as
you are conviced those add a readability value.

Best regards,
Jozsef
-
E-mail  : kadlec@blackhole.kfki.hu, kadlecsik.jozsef@wigner.mta.hu
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
          H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Haran - July 1, 2013, 5:06 p.m.
> -----Original Message-----
> From: Jozsef Kadlecsik [mailto:kadlec@blackhole.kfki.hu]
> Sent: Saturday, June 29, 2013 4:10 AM
> To: Jeff Haran
> Cc: Dash Four; Pablo Neira Ayuso; Netfilter Core Team
> Subject: RE: [PATCH v2 2/5] ipset: add "inner" flag implementation
> 
> On Thu, 27 Jun 2013, Jeff Haran wrote:
> 
> > ...
> > > I disagree. By having "return false" (or "return 0", "return -1" and so
> > > on) instead of "goto err" it is not immediately apparent to someone who
> > > studies/reviews/uses the code that this is an error condition/path. I
> > > have been in that situation many times when I have to go back and look
> > > at a particular function call to see what "return false" or "return 0"
> > > actually means.
> > >
> > > By including "goto err" instead of multiple "return false" statement,
> > > that makes it instantly obvious what the purpose of that statement is
> > > without having to look elsewhere.
> >
> > I suppose as an alternative you could go way against the usual practice
> > and put some text in a function header comment block indicating what the
> > return code means. I know it doesn't get used much but C has had this /*
> > comment */ thing for a long time. I've never understood why more people
> > don't use it.
> 
> In general I'd agree with you, but this is a boolean function. So why
> should the the meaning of the return value "true" and "false" be
> explained? (This is also why I regard the goto unnecessary.)
> 
I was just pointing out that if there was some concern by the author of this code that the intent of the function would not be clear to other readers, instead of "commenting" it via a coding standard (using the goto) it would be easier and clearer to simply write a function header comment describing it. In this particular case where the return code is a bool and the function is setting something, then it's pretty clear that if it returns true it worked and if it returns false it didn't, but the author described other cases where he had to research "what 'return false' or 'return 0' actually means in other kernel sources and he didn't want to create the same issue for others reading his code. I personally can sympathize with that, I've often been in the same boat trying to understand what the author of some piece of code had intended by a return of 0. Comments are easy to write and they remove ambiguity so long as they are kept up to date. They also serve to document what an author intended the code to do, which is not always what it actually does.

Jeff Haran

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/kernel/include/linux/netfilter/ipset/ip_set.h b/kernel/include/linux/netfilter/ipset/ip_set.h
index 8499e25..5b6ef72 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set.h
@@ -17,9 +17,13 @@ 
  #include <linux/netfilter/x_tables.h>
  #include <linux/stringify.h>
  #include <linux/vmalloc.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
  #include <net/netlink.h>
  #include <uapi/linux/netfilter/ipset/ip_set.h>
  #include <linux/netfilter/ipset/ip_set_compat.h>
+#include <linux/netfilter/ipset/ip_set_icmp.h>

  #define _IP_SET_MODULE_DESC(a, b, c)		\
  	MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c)
@@ -361,6 +365,25 @@  static inline int nla_put_ipaddr6(struct sk_buff *skb, int type,
  }

  /* Get address from skbuff */
+static inline bool
+ipv4addrptr(const struct sk_buff *skb, bool inner, bool src, __be32 *addr)
+{
+	struct iphdr *ih = ip_hdr(skb);
+	unsigned int protooff = ip_hdrlen(skb);
+
+	if (ih == NULL ||
+	    (inner &&
+	     (ih->protocol != IPPROTO_ICMP ||
+	      !ip_set_get_icmpv4_inner_hdr(skb, &protooff, &ih))))
+			goto err;
+
+	*addr = src ? ih->saddr : ih->daddr;
+	return true;
+
+err:
+	return false;
+}
+
  static inline __be32
  ip4addr(const struct sk_buff *skb, bool src)
  {
@@ -373,12 +396,55 @@  ip4addrptr(const struct sk_buff *skb, bool src, __be32 *addr)
  	*addr = src ? ip_hdr(skb)->saddr : ip_hdr(skb)->daddr;
  }

+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static inline bool
+ipv6addrptr(const struct sk_buff *skb, bool inner, bool src,
+	    struct in6_addr *addr)
+{
+	struct ipv6hdr *ih = ipv6_hdr(skb);
+
+	if (ih == NULL)
+		goto err;
+
+	if (inner) {
+		unsigned int protooff;
+		u8 nexthdr = ih->nexthdr;
+		__be16 frag_off;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
+		protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					    &nexthdr, &frag_off);
+#else
+		protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					    &nexthdr);
+#endif
+		if (protooff < 0 || nexthdr != IPPROTO_ICMPV6 ||
+		    !ip_set_get_icmpv6_inner_hdr(skb, &protooff, &ih))
+			goto err;
+	}
+	memcpy(addr, src ? &ih->saddr : &ih->daddr, sizeof(*addr));
+	return true;
+
+err:
+	return false;
+}
+
  static inline void
  ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
  {
  	memcpy(addr, src ? &ipv6_hdr(skb)->saddr : &ipv6_hdr(skb)->daddr,
  	       sizeof(*addr));
  }
+#else
+static inline bool
+ipv6addrptr(const struct sk_buff *skb, bool inner, bool src,
+	    struct in6_addr *addr)
+{
+	return false;
+}
+
+static inline void
+ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) { ; }
+#endif

  /* Calculate the bytes required to store the inclusive range of a-b */
  static inline int
diff --git a/kernel/include/linux/netfilter/ipset/ip_set_getport.h b/kernel/include/linux/netfilter/ipset/ip_set_getport.h
index 90d0930..8cdc177 100644
--- a/kernel/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/kernel/include/linux/netfilter/ipset/ip_set_getport.h
@@ -1,13 +1,26 @@ 
  #ifndef _IP_SET_GETPORT_H
  #define _IP_SET_GETPORT_H

+extern bool ip_set_get_ipv4_port(const struct sk_buff *skb, bool inner,
+				 bool src, __be16 *port, u8 *proto);
+
  extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
  				__be16 *port, u8 *proto);

  #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+extern bool ip_set_get_ipv6_port(const struct sk_buff *skb, bool inner,
+				 bool src, __be16 *port, u8 *proto);
+
  extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
  				__be16 *port, u8 *proto);
  #else
+static inline bool ip_set_get_ipv6_port(const struct sk_buff *skb,
+					bool inner, bool src,
+					__be16 *port, u8 *proto)
+{
+	return false;
+}
+
  static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
  				       __be16 *port, u8 *proto)
  {
@@ -15,6 +28,9 @@  static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
  }
  #endif

+extern bool ip_set_get_ipv_port(const struct sk_buff *skb, u8 pf, bool inner,
+				bool src, __be16 *port);
+
  extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
  				__be16 *port);

diff --git a/kernel/include/linux/netfilter/ipset/ip_set_icmp.h b/kernel/include/linux/netfilter/ipset/ip_set_icmp.h
new file mode 100644
index 0000000..e116d28
--- /dev/null
+++ b/kernel/include/linux/netfilter/ipset/ip_set_icmp.h
@@ -0,0 +1,91 @@ 
+#ifndef _IP_SET_ICMP_H
+#define _IP_SET_ICMP_H
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+
+static inline
+bool ip_set_get_icmpv4_inner_hdr(const struct sk_buff *skb,
+				 unsigned int *offset,
+				 struct iphdr **ih)
+{
+	u8 type;
+	struct iphdr _iph;
+	struct icmphdr _icmph;
+	struct iphdr *iph;
+	const struct icmphdr *ich;
+	/* RFC 1122: 3.2.2: req'd len: IP header + 8 bytes of inner header */
+	static const size_t req_len = sizeof(struct iphdr) + 8;
+
+	if (offset == NULL || ih == NULL)
+		goto err;
+
+	ich = skb_header_pointer(skb, *offset, sizeof(_icmph), &_icmph);
+	if (ich == NULL ||
+	    (ich->type <= NR_ICMP_TYPES && skb->len - *offset < req_len))
+		goto err;
+
+	type = ich->type;
+	if (type == ICMP_DEST_UNREACH ||
+	    type == ICMP_SOURCE_QUENCH ||
+	    type == ICMP_TIME_EXCEEDED) {
+		*offset += sizeof(_icmph);
+		iph = skb_header_pointer(skb, *offset, sizeof(_iph), &_iph);
+		if (iph == NULL || ntohs(iph->frag_off) & IP_OFFSET)
+			goto err;
+
+		*ih = iph;
+		return true;
+	}
+
+err:
+	return false;
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static inline
+bool ip_set_get_icmpv6_inner_hdr(const struct sk_buff *skb,
+				 unsigned int *offset,
+				 struct ipv6hdr **ih)
+{
+	u8 type;
+	const struct icmp6hdr *ic;
+	struct icmp6hdr _icmp6h;
+	struct ipv6hdr _ip6h;
+	struct ipv6hdr *iph;
+
+	if (offset == NULL || ih == NULL)
+		goto err;
+
+	ic = skb_header_pointer(skb, *offset, sizeof(_icmp6h), &_icmp6h);
+	if (ic == NULL)
+		goto err;
+
+	type = ic->icmp6_type;
+	if (type == ICMPV6_DEST_UNREACH ||
+	    type == ICMPV6_PKT_TOOBIG ||
+	    type == ICMPV6_TIME_EXCEED) {
+		*offset += sizeof(_icmp6h);
+		iph = skb_header_pointer(skb, *offset, sizeof(_ip6h), &_ip6h);
+		if (iph == NULL)
+			goto err;
+
+		*ih = iph;
+		return true;
+	}
+
+err:
+	return false;
+}
+#else
+static inline
+bool ip_set_get_icmpv6_inner_hdr(const struct sk_buff *skb,
+				 unsigned int offset,
+				 struct ipv6hdr **ih)
+{
+	return false;
+}
+#endif
+
+#endif /*_IP_SET_ICMP_H*/
diff --git a/kernel/include/uapi/linux/netfilter/ipset/ip_set.h b/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
index 8024cdf..e9e6586 100644
--- a/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/kernel/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -161,6 +161,8 @@  enum ipset_cmd_flags {
  		(1 << IPSET_FLAG_BIT_SKIP_SUBCOUNTER_UPDATE),
  	IPSET_FLAG_BIT_MATCH_COUNTERS = 5,
  	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
+	IPSET_FLAG_BIT_INNER = 6,
+	IPSET_FLAG_INNER = (1 << IPSET_FLAG_BIT_INNER),
  	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
  	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
  	IPSET_FLAG_CMD_MAX = 15,
diff --git a/kernel/net/netfilter/ipset/ip_set_getport.c b/kernel/net/netfilter/ipset/ip_set_getport.c
index a0d96eb..73ccfb3 100644
--- a/kernel/net/netfilter/ipset/ip_set_getport.c
+++ b/kernel/net/netfilter/ipset/ip_set_getport.c
@@ -19,7 +19,9 @@ 
  #include <linux/netfilter_ipv6/ip6_tables.h>
  #include <net/ip.h>
  #include <net/ipv6.h>
+#include <net/icmp.h>

+#include <linux/netfilter/ipset/ip_set_icmp.h>
  #include <linux/netfilter/ipset/ip_set_getport.h>

  /* We must handle non-linear skbs */
@@ -97,10 +99,10 @@  get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
  }

  bool
-ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
-		    __be16 *port, u8 *proto)
+ip_set_get_ipv4_port(const struct sk_buff *skb, bool inner, bool src,
+		     __be16 *port, u8 *proto)
  {
-	const struct iphdr *iph = ip_hdr(skb);
+	struct iphdr *iph = ip_hdr(skb);
  	unsigned int protooff = ip_hdrlen(skb);
  	int protocol = iph->protocol;

@@ -116,54 +118,93 @@  ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
  		case IPPROTO_UDPLITE:
  		case IPPROTO_ICMP:
  			/* Port info not available for fragment offset > 0 */
-			return false;
+			goto err;
  		default:
  			/* Other protocols doesn't have ports,
  			   so we can match fragments */
  			*proto = protocol;
  			return true;
  		}
+	if (inner) {
+		if (protocol != IPPROTO_ICMP ||
+		    !ip_set_get_icmpv4_inner_hdr(skb, &protooff, &iph))
+			goto err;

+		protocol = iph->protocol;
+		protooff += iph->ihl*4;
+	}
  	return get_port(skb, protocol, protooff, src, port, proto);
+
+err:
+	return false;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ipv4_port);
+
+bool
+ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
+		    __be16 *port, u8 *proto)
+{
+	return ip_set_get_ipv4_port(skb, false, src, port, proto);
  }
  EXPORT_SYMBOL_GPL(ip_set_get_ip4_port);

  #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
  bool
-ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
-		    __be16 *port, u8 *proto)
+ip_set_get_ipv6_port(const struct sk_buff *skb, bool inner, bool src,
+		     __be16 *port, u8 *proto)
  {
-	int protoff;
+	unsigned int protooff;
  	u8 nexthdr;
  	__be16 frag_off = 0;

  	nexthdr = ipv6_hdr(skb)->nexthdr;
  #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0)
-	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+	protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
  				   &frag_off);
  #else
-	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
+	protooff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
  #endif
-	if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
-		return false;
+	if (protooff < 0 || (frag_off & htons(~0x7)) != 0)
+		goto err;
+
+	if (inner) {
+		struct ipv6hdr *ih;
+		if (nexthdr != IPPROTO_ICMPV6 ||
+		    !ip_set_get_icmpv6_inner_hdr(skb, &protooff, &ih))
+			goto err;

-	return get_port(skb, nexthdr, protoff, src, port, proto);
+		nexthdr = ih->nexthdr;
+		protooff += sizeof(struct ipv6hdr);
+	}
+	return get_port(skb, nexthdr, protooff, src, port, proto);
+
+err:
+	return false;
+}
+EXPORT_SYMBOL_GPL(ip_set_get_ipv6_port);
+
+bool
+ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
+		    __be16 *port, u8 *proto)
+{
+	return ip_set_get_ipv6_port(skb, false, src, port, proto);
  }
  EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
  #endif

  bool
-ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+ip_set_get_ipv_port(const struct sk_buff *skb, u8 pf, bool inner, bool src,
+		    __be16 *port)
  {
  	bool ret;
  	u8 proto;

  	switch (pf) {
  	case NFPROTO_IPV4:
-		ret = ip_set_get_ip4_port(skb, src, port, &proto);
+		ret = ip_set_get_ipv4_port(skb, inner, src, port, &proto);
  		break;
  	case NFPROTO_IPV6:
-		ret = ip_set_get_ip6_port(skb, src, port, &proto);
+		ret = ip_set_get_ipv6_port(skb, inner, src, port, &proto);
  		break;
  	default:
  		return false;
@@ -178,4 +219,11 @@  ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
  		return false;
  	}
  }
+EXPORT_SYMBOL_GPL(ip_set_get_ipv_port);
+
+bool
+ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
+{
+	return ip_set_get_ipv_port(skb, pf, false, src, port);
+}
  EXPORT_SYMBOL_GPL(ip_set_get_ip_port);