diff mbox

[PATCHv3,net-next-2.6,3/5] XFRM,IPv6: Add IRO src/dst address remapping XFRM types and i/o handlers

Message ID fd4eec3c9486c46b535e89ceed479c7536f51fb9.1285749610.git.arno@natisbad.org
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Arnaud Ebalard Sept. 29, 2010, 9:05 a.m. UTC
Add IRO source and destination remapping XFRM types and associated
input/output handlers. This allows userland to install such states
in order to support remapping of source or destination address
of packet. They basically work like existing RH2 and HAO ones; the
main difference is that output handlers do not expand the packet by
adding an extension header: they simply change the source or
destination in place. Input handlers are almost the same as RH2/HAO
version in their behavior, but they are triggered differently. RH2
and HAO handlers are triggered based on structures found in the
packet. On input, IRO states (and associated handlers) are looked
up when processing an IPsec-protected packet, when there is an
address mismatch.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
---
 include/net/xfrm.h       |    2 +
 net/ipv6/mip6.c          |  153 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/xfrm6_mode_ro.c |   11 +++-
 net/xfrm/xfrm_user.c     |    4 +
 4 files changed, 169 insertions(+), 1 deletions(-)

Comments

David Miller Sept. 30, 2010, 3:16 a.m. UTC | #1
From: Arnaud Ebalard <arno@natisbad.org>
Date: Wed, 29 Sep 2010 11:05:47 +0200

> +static int mip6_iro_src_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
> +{
> +	int err = 0;
> +
> +	/* XXX We may need some reject handler at some point but it is not
> +	 * critical yet: see xfrm_secpath_reject() in net/xfrm/xfrm_policy.c
> +	 * and aslo what mip6_destopt_reject() implements */
> +
> +	printk("XXX FIXME: mip6_iro_src_reject() called\n");

pr_debug() or pr_err() or get rid of it altogher and use WARN_ON() or
similar.

> +	spin_lock(&x->lock);
> +	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
> +	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
> +		err = -ENOENT;
> +	spin_unlock(&x->lock);

What are you actually protecting with this lock?  The moment you drop
it the x->coaddr can change which changes the result you should return
here.

I suspect you either don't need the lock, or you need to lock at a higher
level.

> +		printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,

pr_info()

> +		printk(KERN_INFO "%s: state's mode is not %u: %u\n",

pr_info()

> +		       __func__, XFRM_MODE_ROUTEOPTIMIZATION,

Printing decimal values for CPP macro constants does not make log
messages very readable.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnaud Ebalard Oct. 2, 2010, 10:17 a.m. UTC | #2
Hi,

David Miller <davem@davemloft.net> writes:

>> +static int mip6_iro_src_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
>> +{
>> +	int err = 0;
>> +
>> +	/* XXX We may need some reject handler at some point but it is not
>> +	 * critical yet: see xfrm_secpath_reject() in net/xfrm/xfrm_policy.c
>> +	 * and aslo what mip6_destopt_reject() implements */
>> +
>> +	printk("XXX FIXME: mip6_iro_src_reject() called\n");
>
> pr_debug() or pr_err() or get rid of it altogher and use WARN_ON() or
> similar.

I will take a look at this reject handler tomorrow (implement or remove it).


>> +	spin_lock(&x->lock);
>> +	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
>> +	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
>> +		err = -ENOENT;
>> +	spin_unlock(&x->lock);
>
> What are you actually protecting with this lock?  The moment you drop
> it the x->coaddr can change which changes the result you should return
> here.
>
> I suspect you either don't need the lock, or you need to lock at a higher
> level.

I basically trusted RH2 input handler code and reused it as a basis:

  static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
  {
  	struct ipv6hdr *iph = ipv6_hdr(skb);
  	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
  	int err = rt2->rt_hdr.nexthdr;
  
  	spin_lock(&x->lock);
  	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
  	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
  		err = -ENOENT;
  	spin_unlock(&x->lock);
  
  	return err;
  }

*At that time*, I considered the lock useful to prevent changes on coaddr
during the two checks, i.e. to make it coherent. But I think you are
right and I see no reason for the lock not to be at a higher level:
I may have missed somthing but AFAICT, from a look at the code, there is
nothing preventing x->coaddr to  be updated (via xfrm_sa_update()) just
before or just after the checks.

I took a look at the callers for mip6 handlers and if I am not mistaken
there is *only* xfrm6_input_addr() because xfrm_input() only handles
esp, ah and ipcomp extension headers and not mip6-related ones
(i.e. only IPsec-related ones, those with a SPI). Here is a snippet of
the interesting (lock-wise) part of xfrm6_input_addr():

>	for (i = 0; i < 3; i++) {
>
>               <....snip....>
>
> 		spin_lock(&x->lock);
> 
> 		if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
> 		    likely(x->km.state == XFRM_STATE_VALID) &&
> 		    !xfrm_state_check_expire(x)) {
> 			spin_unlock(&x->lock);
> 			if (x->type->input(x, skb) > 0) {
> 				/* found a valid state */
> 				break;
> 			}
> 		} else
> 			spin_unlock(&x->lock);
> 
> 		xfrm_state_put(x);
> 		x = NULL;
> 	}
> 
> 	if (!x) {
> 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
> 		xfrm_audit_state_notfound_simple(skb, AF_INET6);
> 		goto drop;
> 	}
> 
> 	skb->sp->xvec[skb->sp->len++] = x;
> 
> 	spin_lock(&x->lock);
> 
> 	x->curlft.bytes += skb->len;
> 	x->curlft.packets++;
> 
> 	spin_unlock(&x->lock);

and I see no reason not to keep the lock we have on the state until the
end of the function when the state is valid (when we break), instead of
releasing it to get it again later. Something like the following would
allow removing the spin_lock()/spin_unlock() calls from all mip6 input
handlers (mip6_{destopt,rthdr,iro_src,iro_dst}_input()):

> 		spin_lock(&x->lock);
>
> 		if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
> 		    likely(x->km.state == XFRM_STATE_VALID) &&
> 		    !xfrm_state_check_expire(x)) {
> 			if (x->type->input(x, skb) > 0) {
> 				/* found a valid state */
> 				break;
> 			} 
> 		}
>
> 		spin_unlock(&x->lock);
>
> 		xfrm_state_put(x);
> 		x = NULL;
> 	}
> 
> 	if (!x) {
> 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
> 		xfrm_audit_state_notfound_simple(skb, AF_INET6);
> 		goto drop;
> 	}
> 
> 	skb->sp->xvec[skb->sp->len++] = x;
> 
> 	x->curlft.bytes += skb->len;
> 	x->curlft.packets++;
> 
> 	spin_unlock(&x->lock);

If this is ok, I will add a patch to the set to do that and also remove
the locks from the input handlers I introduce.

Cheers,

a+
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Oct. 2, 2010, 10:32 a.m. UTC | #3
On Sat, Oct 02, 2010 at 12:17:35PM +0200, Arnaud Ebalard wrote:
>
> and I see no reason not to keep the lock we have on the state until the
> end of the function when the state is valid (when we break), instead of
> releasing it to get it again later. Something like the following would
> allow removing the spin_lock()/spin_unlock() calls from all mip6 input
> handlers (mip6_{destopt,rthdr,iro_src,iro_dst}_input()):

No I moved the state lock down precisely because it should not
be taken at a higher level as that breaks asynchronous IPsec
processing and the fact that it isn't needed in most places.

If your code needs it then you should take it rather than impose
it on real IPsec users.

Cheers,
Arnaud Ebalard Oct. 3, 2010, 1:41 p.m. UTC | #4
Hi Herbert,

Herbert Xu <herbert@gondor.apana.org.au> writes:

> On Sat, Oct 02, 2010 at 12:17:35PM +0200, Arnaud Ebalard wrote:
>>
>> and I see no reason not to keep the lock we have on the state until the
>> end of the function when the state is valid (when we break), instead of
>> releasing it to get it again later. Something like the following would
>> allow removing the spin_lock()/spin_unlock() calls from all mip6 input
>> handlers (mip6_{destopt,rthdr,iro_src,iro_dst}_input()):
>
> No I moved the state lock down precisely because it should not
> be taken at a higher level as that breaks asynchronous IPsec
> processing and the fact that it isn't needed in most places.
>
> If your code needs it then you should take it rather than impose
> it on real IPsec users.

Understood. Note that I am on your side with this: my primary concern
while pushing the feature is *to not break or slow down standard IPsec*.
I do not expect my code to be accepted or even read otherwise.

As for the current point raised by David on the position of the locks in
my input handlers, they are based on the position of the locks in the
*existing* RH2 (mip6_rthdr_input()) and HAO (mip6_destopt_input())
handlers. As they serve the same purpose (src/dst address check against
state's address) and the code is basically the same, I have no reason to
do things differently as what is currently upstream.

After your reply, I took a (too long) look at the history of
xfrm6_input_addr() to understand why it is as it is. If it can spare you
some time, here is what I think happened:

 - Initially (commit fbd9a5b4, Aug 23 2006), the checks on the status of
   state, the call to x->type->input() and the changes on state's
   processing stats (x->curlft changes) were *globally* protected by a
   call to spin_lock(). The same day, a related commit (3d126890) added
   support for RH2/HAO input handler. No lock inside the handler. The
   content of xfrm6_input_addr() was:

		spin_lock(&x->lock);

                <...snip...>

		nh = x->type->input(x, skb);
		if (nh <= 0) {
			spin_unlock(&x->lock);
			xfrm_state_put(x);
			x = NULL;
			continue;
		}

		x->curlft.bytes += skb->len;
		x->curlft.packets++;

		spin_unlock(&x->lock);

 - Then, as you wrote, the state lock was moved in all input handlers
   (commit 0ebea8ef, Nov 13 2007), including RH2/HAO ones:

   @@ -128,12 +128,15 @@ static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
    {
           struct ipv6hdr *iph = ipv6_hdr(skb);
           struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
   +       int err = destopt->nexthdr;
    
   +       spin_lock(&x->lock);
           if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
               !ipv6_addr_any((struct in6_addr *)x->coaddr))
   -               return -ENOENT;
   +               err = -ENOENT;
   +       spin_unlock(&x->lock);
    
   -       return destopt->nexthdr;
   +       return err;
    }

   With that commit, I think a deadlock was introduced in MIPv6 code
   because xfm6_input_addr() was left unchanged, i.e. x->type->input()
   was called with the lock held. Am I correct?

 - The code of xfrm6_input_addr() was then optimized by commit a002c6fd
   in such a way that x->type->input() was then put outside the
   protection of the lock, which (if I am not mistaken) removed the
   deadlock: 

	spin_lock(&x->lock);

	if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
	    likely(x->km.state == XFRM_STATE_VALID) &&
	    !xfrm_state_check_expire(x)) {
		spin_unlock(&x->lock);
		if (x->type->input(x, skb) > 0) {
			/* found a valid state */
			break;
		}
	} else
		spin_unlock(&x->lock);

   I don't know if this is was intentional.

   But the main question remains on the position of the lock. Here,
   checks are done on the status of the state, lock is released,
   reacquired in the input handler to do additional check and then
   released again, to be reacquired later in the function to act on
   statistics. Is my reading of the code correct?


Herbert, you certainly have a better understanding of XFRM code than I
have and can probably tell if the locking behavior above is valid or
buggy. Yoshifuji-san, David or Eric may also have good ideas on that.


As a side note (I think I was not explicit enough in my previous email),
I think the possible changes to xfrm_input_addr() and MIPv6 handlers we
are discussing are not expected to impact standard IPsec code because
there are 2 different cases in which states input handlers are called 
(i.e. x->type->input()):

 - xfrm_input(): for standard IPsec case (incl. async resumption). This 
   is only for esp, ah, ipcomp and tunneling.
 - xfrm6_input_addr(): for MIPv6 extension header, i.e. RH2 and HAO in
   destopt.

and we are discussing the second.


David, as for my patches, if this is ok for you, I will keep the code of
my input handlers aligned on the code of RH2/HAO handlers and will modify
it later based on the possible corrections made on those upstream.

Don't hesitate to slap me if I made some mistakes in my analysis ;-)

Cheers,

a+
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu Oct. 3, 2010, 3:12 p.m. UTC | #5
On Sun, Oct 03, 2010 at 03:41:04PM +0200, Arnaud Ebalard wrote:
>
> After your reply, I took a (too long) look at the history of
> xfrm6_input_addr() to understand why it is as it is. If it can spare you
> some time, here is what I think happened:

...

>  - Then, as you wrote, the state lock was moved in all input handlers
>    (commit 0ebea8ef, Nov 13 2007), including RH2/HAO ones:

...

>    With that commit, I think a deadlock was introduced in MIPv6 code
>    because xfm6_input_addr() was left unchanged, i.e. x->type->input()
>    was called with the lock held. Am I correct?
> 
>  - The code of xfrm6_input_addr() was then optimized by commit a002c6fd
>    in such a way that x->type->input() was then put outside the
>    protection of the lock, which (if I am not mistaken) removed the
>    deadlock: 

...

>    I don't know if this is was intentional.

Indeed MIPv6 was completely out of action for three months and
nobody noticed :)

>    But the main question remains on the position of the lock. Here,
>    checks are done on the status of the state, lock is released,
>    reacquired in the input handler to do additional check and then
>    released again, to be reacquired later in the function to act on
>    statistics. Is my reading of the code correct?

When I moved the lock down I chose the safest option and added
it to every single input function.  So it may well be the case
that the lock isn't needed at all on the MIPv6 path.

Cheers,
Arnaud Ebalard Oct. 3, 2010, 9:25 p.m. UTC | #6
Hello,

Herbert Xu <herbert@gondor.apana.org.au> writes:

> On Sun, Oct 03, 2010 at 03:41:04PM +0200, Arnaud Ebalard wrote:
>>
>> After your reply, I took a (too long) look at the history of
>> xfrm6_input_addr() to understand why it is as it is. If it can spare you
>> some time, here is what I think happened:
>
> ...
>
>>  - Then, as you wrote, the state lock was moved in all input handlers
>>    (commit 0ebea8ef, Nov 13 2007), including RH2/HAO ones:
>
> ...
>
>>    With that commit, I think a deadlock was introduced in MIPv6 code
>>    because xfm6_input_addr() was left unchanged, i.e. x->type->input()
>>    was called with the lock held. Am I correct?
>> 
>>  - The code of xfrm6_input_addr() was then optimized by commit a002c6fd
>>    in such a way that x->type->input() was then put outside the
>>    protection of the lock, which (if I am not mistaken) removed the
>>    deadlock: 
>
> ...
>
>>    I don't know if this is was intentional.
>
> Indeed MIPv6 was completely out of action for three months and
> nobody noticed :)

hehe ;-) Just to correct a missing waypoint in my history, which is in
fact the real fix for the deadlock:

   commit 9473e1f631de339c50bde1e3bd09e1045fe90fd5
   Author: Masahide NAKAMURA <nakam@linux-ipv6.org>
   Date:   Thu Dec 20 20:41:57 2007 -0800
   
       [XFRM] MIPv6: Fix to input RO state correctly.
       
       Disable spin_lock during xfrm_type.input() function.
       Follow design as IPsec inbound does.
    
       Signed-off-by: Masahide NAKAMURA <nakam@linux-ipv6.org>
       Signed-off-by: David S. Miller <davem@davemloft.net>

>>    But the main question remains on the position of the lock. Here,
>>    checks are done on the status of the state, lock is released,
>>    reacquired in the input handler to do additional check and then
>>    released again, to be reacquired later in the function to act on
>>    statistics. Is my reading of the code correct?
>
> When I moved the lock down I chose the safest option and added
> it to every single input function.  So it may well be the case
> that the lock isn't needed at all on the MIPv6 path.

I don't have any technical argument to support the removal of the locks,
i.e. don't see what would prevent changes during the check. I will try
and spend more time on it, but meanwhile I think it's safe to keep
things the way they are.

Thanks for your time, Herbert.

Cheers,

a+
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e6a753c..05b2b1f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -35,6 +35,8 @@ 
 #define XFRM_PROTO_IPV6		41
 #define XFRM_PROTO_ROUTING	IPPROTO_ROUTING
 #define XFRM_PROTO_DSTOPTS	IPPROTO_DSTOPTS
+#define XFRM_PROTO_IRO_SRC      127
+#define XFRM_PROTO_IRO_DST      128
 
 #define XFRM_ALIGN8(len)	(((len) + 7) & ~7)
 #define MODULE_ALIAS_XFRM_MODE(family, encap) \
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index d6e9599..04b9e1d 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -477,6 +477,131 @@  static const struct xfrm_type mip6_rthdr_type =
 	.hdr_offset	= mip6_rthdr_offset,
 };
 
+#ifdef CONFIG_XFRM_SUB_POLICY
+/* IRO equivalent of mip6_destopt_input(): handles incoming packet with a
+ * source address different from the one expected in the SA: check that
+ * received source address is indeed the CoA we expected (or any address
+ * if the state references the unspecified address '::') */
+static int mip6_iro_src_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	int err = 1;
+
+	spin_lock(&x->lock);
+	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		err = -ENOENT;
+	spin_unlock(&x->lock);
+
+	return err;
+}
+
+/* IRO equivalent of mip6_destopt_output(): replaces current source address
+ * of outgoing packet by state's CoA. */
+static int mip6_iro_src_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	spin_lock_bh(&x->lock);
+	memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr));
+	spin_unlock_bh(&x->lock);
+
+	return 0;
+}
+
+static int mip6_iro_src_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl)
+{
+	int err = 0;
+
+	/* XXX We may need some reject handler at some point but it is not
+	 * critical yet: see xfrm_secpath_reject() in net/xfrm/xfrm_policy.c
+	 * and aslo what mip6_destopt_reject() implements */
+
+	printk("XXX FIXME: mip6_iro_src_reject() called\n");
+
+	return err;
+}
+
+/* This is the IRO equivalent of mip6_rthdr_input(): handles incoming packet
+ * with a destination address different from the one expected in the SA:
+ * check that received destination address is indeed the CoA we expected
+ * (or any address if the state references the unspecified address '::') */
+static int mip6_iro_dst_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+	int err = 1;
+
+	spin_lock(&x->lock);
+	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
+	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
+		err = -ENOENT;
+	spin_unlock(&x->lock);
+
+	return err;
+}
+
+/* IRO equivalent of mip6_rthdr_output(): replaces current destination
+ * address of outgoing packet with state's CoA */
+static int mip6_iro_dst_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	spin_lock_bh(&x->lock);
+	memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr));
+	spin_unlock_bh(&x->lock);
+
+	return 0;
+}
+
+/* Common to iro src and dst remapping states. */
+static int mip6_iro_init_state(struct xfrm_state *x)
+{
+	if (x->id.spi) {
+		printk(KERN_INFO "%s: spi is not 0: %u\n", __func__,
+		       x->id.spi);
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) {
+		printk(KERN_INFO "%s: state's mode is not %u: %u\n",
+		       __func__, XFRM_MODE_ROUTEOPTIMIZATION,
+		       x->props.mode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Unlike common IPsec protocols, nothing to do when destroying */
+static void mip6_iro_destroy(struct xfrm_state *x)
+{
+}
+
+static const struct xfrm_type mip6_iro_src_type =
+{
+	.description	= "MIP6_IRO_SRC",
+	.owner		= THIS_MODULE,
+	.proto	     	= XFRM_PROTO_IRO_SRC,
+	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
+	.init_state	= mip6_iro_init_state,
+	.destructor	= mip6_iro_destroy,
+	.input		= mip6_iro_src_input,
+	.output		= mip6_iro_src_output,
+	.reject         = mip6_iro_src_reject,
+};
+
+static const struct xfrm_type mip6_iro_dst_type =
+{
+	.description	= "MIP6_IRO_DST",
+	.owner		= THIS_MODULE,
+	.proto	     	= XFRM_PROTO_IRO_DST,
+	.flags		= XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
+	.init_state	= mip6_iro_init_state,
+	.destructor	= mip6_iro_destroy,
+	.input		= mip6_iro_dst_input,
+	.output		= mip6_iro_dst_output,
+};
+#endif /* CONFIG_XFRM_SUB_POLICY */
+
 static int __init mip6_init(void)
 {
 	printk(KERN_INFO "Mobile IPv6\n");
@@ -489,6 +614,20 @@  static int __init mip6_init(void)
 		printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__);
 		goto mip6_rthdr_xfrm_fail;
 	}
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (xfrm_register_type(&mip6_iro_src_type, AF_INET6) < 0) {
+		printk(KERN_INFO "%s: can't add xfrm type(IRO src remap)\n",
+		       __func__);
+		goto mip6_iro_src_remap_xfrm_fail;
+	}
+	if (xfrm_register_type(&mip6_iro_dst_type, AF_INET6) < 0) {
+		printk(KERN_INFO "%s: can't add xfrm type(IRO dst remap)\n",
+		       __func__);
+		goto mip6_iro_dst_remap_xfrm_fail;
+	}
+#endif
+
 	if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
 		printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__);
 		goto mip6_rawv6_mh_fail;
@@ -498,6 +637,12 @@  static int __init mip6_init(void)
 	return 0;
 
  mip6_rawv6_mh_fail:
+#ifdef CONFIG_XFRM_SUB_POLICY
+	xfrm_unregister_type(&mip6_iro_dst_type, AF_INET6);
+ mip6_iro_dst_remap_xfrm_fail:
+	xfrm_unregister_type(&mip6_iro_src_type, AF_INET6);
+ mip6_iro_src_remap_xfrm_fail:
+#endif
 	xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
  mip6_rthdr_xfrm_fail:
 	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
@@ -509,6 +654,14 @@  static void __exit mip6_fini(void)
 {
 	if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
 		printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__);
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (xfrm_unregister_type(&mip6_iro_dst_type, AF_INET6) < 0)
+		printk(KERN_INFO "%s: can't remove xfrm type(IRO dst remap)\n",
+		       __func__);
+	if (xfrm_unregister_type(&mip6_iro_src_type, AF_INET6) < 0)
+		printk(KERN_INFO "%s: can't remove xfrm type(IRO src remap)\n",
+		       __func__);
+#endif
 	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
 		printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__);
 	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 63d5d49..ea33178 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -45,6 +45,15 @@  static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	u8 *prevhdr;
 	int hdr_len;
 
+	/* Unlike RH2 (IPPROTO_ROUTING) and HAO in DstOpt
+	 * (IPPROTO_DSTOPTS), IRO remapping states do not
+	 * add extension header to the packet. Source
+	 * and/or destination addresses are simply modified
+	 * in place. */
+	if (x->id.proto == XFRM_PROTO_IRO_SRC ||
+	    x->id.proto == XFRM_PROTO_IRO_DST)
+		goto out;
+
 	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
@@ -54,8 +63,8 @@  static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	__skb_pull(skb, hdr_len);
 	memmove(ipv6_hdr(skb), iph, hdr_len);
 
+ out:
 	x->lastused = get_seconds();
-
 	return 0;
 }
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8bae6b2..2aecd40 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -179,6 +179,10 @@  static int verify_newsa_info(struct xfrm_usersa_info *p,
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
+#ifdef CONFIG_XFRM_SUB_POLICY
+	case XFRM_PROTO_IRO_SRC:
+	case XFRM_PROTO_IRO_DST:
+#endif
 		if (attrs[XFRMA_ALG_COMP]	||
 		    attrs[XFRMA_ALG_AUTH]	||
 		    attrs[XFRMA_ALG_AUTH_TRUNC]	||