diff mbox series

[net-next,2/3] rds: Enable RDS IPv6 support

Message ID 7f4f460079d3d78a18f7d759488048798e99c4db.1529922794.git.ka-cheong.poon@oracle.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series rds: IPv6 support | expand

Commit Message

Ka-Cheong Poon June 25, 2018, 10:38 a.m. UTC
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests.  RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.

Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
---
 net/rds/bind.c           | 21 +++++++++++++++---
 net/rds/connection.c     | 43 ++++++++++++++++++++++++-------------
 net/rds/ib.c             | 55 +++++++++++++++++++++++++++++++++++++++++-------
 net/rds/ib_cm.c          | 15 +++++++------
 net/rds/rdma_transport.c | 32 ++++++++++++++++++++++++++--
 net/rds/rdma_transport.h |  2 ++
 net/rds/rds.h            | 12 ++++++-----
 net/rds/send.c           | 23 ++++++++++++++++++--
 net/rds/tcp.c            | 54 +++++++++++++++++++++++++++++------------------
 net/rds/tcp.h            |  4 +---
 net/rds/tcp_connect.c    | 54 ++++++++++++++++++++++++++++++++++++-----------
 net/rds/tcp_listen.c     | 40 +++++++++++++++++++++++++++--------
 12 files changed, 269 insertions(+), 86 deletions(-)

Comments

kernel test robot June 25, 2018, 2:52 p.m. UTC | #1
Hi Ka-Cheong,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Ka-Cheong-Poon/rds-IPv6-support/20180625-190047
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   net/rds/tcp_listen.c:86:22: sparse: expression using sizeof(void)
>> net/rds/tcp_listen.c:288:33: sparse: incorrect type in assignment (different base types) @@    expected restricted __be16 [usertype] sin6_port @@    got unsignedrestricted __be16 [usertype] sin6_port @@
   net/rds/tcp_listen.c:288:33:    expected restricted __be16 [usertype] sin6_port
   net/rds/tcp_listen.c:288:33:    got unsigned short [unsigned] [usertype] <noident>
>> net/rds/tcp_listen.c:295:38: sparse: incorrect type in assignment (different base types) @@    expected restricted __be32 [usertype] s_addr @@    got ricted __be32 [usertype] s_addr @@
   net/rds/tcp_listen.c:295:38:    expected restricted __be32 [usertype] s_addr
   net/rds/tcp_listen.c:295:38:    got unsigned long [unsigned] <noident>
>> net/rds/tcp_listen.c:296:31: sparse: incorrect type in assignment (different base types) @@    expected restricted __be16 [usertype] sin_port @@    got unsignedrestricted __be16 [usertype] sin_port @@
   net/rds/tcp_listen.c:296:31:    expected restricted __be16 [usertype] sin_port
   net/rds/tcp_listen.c:296:31:    got unsigned short [unsigned] [usertype] <noident>

vim +288 net/rds/tcp_listen.c

   258	
   259	struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
   260	{
   261		struct socket *sock = NULL;
   262		struct sockaddr_storage ss;
   263		struct sockaddr_in6 *sin6;
   264		struct sockaddr_in *sin;
   265		int addr_len;
   266		int ret;
   267	
   268		ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
   269				       IPPROTO_TCP, &sock);
   270		if (ret < 0) {
   271			rdsdebug("could not create %s listener socket: %d\n",
   272				 isv6 ? "IPv6" : "IPv4", ret);
   273			goto out;
   274		}
   275	
   276		sock->sk->sk_reuse = SK_CAN_REUSE;
   277		rds_tcp_nonagle(sock);
   278	
   279		write_lock_bh(&sock->sk->sk_callback_lock);
   280		sock->sk->sk_user_data = sock->sk->sk_data_ready;
   281		sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
   282		write_unlock_bh(&sock->sk->sk_callback_lock);
   283	
   284		if (isv6) {
   285			sin6 = (struct sockaddr_in6 *)&ss;
   286			sin6->sin6_family = PF_INET6;
   287			sin6->sin6_addr = in6addr_any;
 > 288			sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
   289			sin6->sin6_scope_id = 0;
   290			sin6->sin6_flowinfo = 0;
   291			addr_len = sizeof(*sin6);
   292		} else {
   293			sin = (struct sockaddr_in *)&ss;
   294			sin->sin_family = PF_INET;
 > 295			sin->sin_addr.s_addr = INADDR_ANY;
 > 296			sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
   297			addr_len = sizeof(*sin);
   298		}
   299	
   300		ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
   301		if (ret < 0) {
   302			rdsdebug("could not bind %s listener socket: %d\n",
   303				 isv6 ? "IPv6" : "IPv4", ret);
   304			goto out;
   305		}
   306	
   307		ret = sock->ops->listen(sock, 64);
   308		if (ret < 0)
   309			goto out;
   310	
   311		return sock;
   312	out:
   313		if (sock)
   314			sock_release(sock);
   315		return NULL;
   316	}
   317	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Sowmini Varadhan June 25, 2018, 5:03 p.m. UTC | #2
On (06/25/18 03:38), Ka-Cheong Poon wrote:
> @@ -1105,8 +1105,27 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
>  			break;
>  
>  		case sizeof(*sin6): {
> -			ret = -EPROTONOSUPPORT;
> -			goto out;
> +			int addr_type;
                         :
                         :
> +			daddr = sin6->sin6_addr;
> +			dport = sin6->sin6_port;
> +			scope_id = sin6->sin6_scope_id;
> +			break;
>  		}

In rds_sendmsg, the scopeid passed to rds_conn_create_outgoing
may come from the msg_name (if msg_name is a link-local) or
may come from the rs_bound_scope_id (for connected socket, change
made in Patch 1 of the series). 

This sounds inconsistent.

If I bind to scopeid if1 and then send to fe80::1%if2 (without connect()), 
we'd create an rds_connection with dev_if set to if2. 
(first off, its a bit unexpected to be sending to fe80::1%if2 when you
are bound to a link-local on if1!)

But then, if we got back a response from fe80::1%if2, I think we would
not find a matching conn in rds_recv_incoming? 

And this is even more confusing because the fastpath in rds_sendmsg
does not take the bound_scope_id into consideration at all:
1213         if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
1214                 conn = rs->rs_conn;
1215         else {
1216                 conn = rds_conn_create_outgoing( /* .. */, scope_id)
so if I erroneously passed a msg_name on a connected rds socket, what
would happen? (see also question about rds_connect() itself, below)

Should we always use rs_bound_scope_id for creating the outgoing
rds_connection? (you may need something deterministic for this, 
like "if bound addr is linklocal, return error if daddr has a different
scopeid, else use the bound addr's scopeid", plus, "if bound addr is
not global, and daddr is link-local, we need a conn with the daddr's
scopeid")

Also, why is there no IPv6 support in rds_connect? 

(still looking through the rds-tcp changes, but wanted to get these
questions clarified first).

--Sowmini
Ka-Cheong Poon June 25, 2018, 5:43 p.m. UTC | #3
On 06/26/2018 01:03 AM, Sowmini Varadhan wrote:
> On (06/25/18 03:38), Ka-Cheong Poon wrote:
>> @@ -1105,8 +1105,27 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
>>   			break;
>>   
>>   		case sizeof(*sin6): {
>> -			ret = -EPROTONOSUPPORT;
>> -			goto out;
>> +			int addr_type;
>                           :
>                           :
>> +			daddr = sin6->sin6_addr;
>> +			dport = sin6->sin6_port;
>> +			scope_id = sin6->sin6_scope_id;
>> +			break;
>>   		}
> 
> In rds_sendmsg, the scopeid passed to rds_conn_create_outgoing
> may come from the msg_name (if msg_name is a link-local) or
> may come from the rs_bound_scope_id (for connected socket, change
> made in Patch 1 of the series).
> 
> This sounds inconsistent.
> 
> If I bind to scopeid if1 and then send to fe80::1%if2 (without connect()),
> we'd create an rds_connection with dev_if set to if2.
> (first off, its a bit unexpected to be sending to fe80::1%if2 when you
> are bound to a link-local on if1!)
> 
> But then, if we got back a response from fe80::1%if2, I think we would
> not find a matching conn in rds_recv_incoming?


Yes, I think if the socket is bound, it should check the scope_id
in msg_name (if not NULL) to make sure that they match.  A bound
RDS socket can send to multiple peers.  But if the bound local
address is link local, it should only be allowed to send to peers
on the same link.


> And this is even more confusing because the fastpath in rds_sendmsg
> does not take the bound_scope_id into consideration at all:
> 1213         if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
> 1214                 conn = rs->rs_conn;
> 1215         else {
> 1216                 conn = rds_conn_create_outgoing( /* .. */, scope_id)
> so if I erroneously passed a msg_name on a connected rds socket, what
> would happen? (see also question about rds_connect() itself, below)


The check added above takes care of this.  The scope_id should
match.


> Should we always use rs_bound_scope_id for creating the outgoing
> rds_connection? (you may need something deterministic for this,
> like "if bound addr is linklocal, return error if daddr has a different
> scopeid, else use the bound addr's scopeid", plus, "if bound addr is
> not global, and daddr is link-local, we need a conn with the daddr's
> scopeid")


If a socket is bound, I guess the scope_id should be used.  So
if a socket is not bound to a link local address and the socket
is used to sent to a link local peer, it should fail.


> Also, why is there no IPv6 support in rds_connect?


Oops, I missed this when I ported the internal version to the
net-next version.  Will add it back.
Sowmini Varadhan June 25, 2018, 5:50 p.m. UTC | #4
On (06/26/18 01:43), Ka-Cheong Poon wrote:
> 
> Yes, I think if the socket is bound, it should check the scope_id
> in msg_name (if not NULL) to make sure that they match.  A bound
> RDS socket can send to multiple peers.  But if the bound local
> address is link local, it should only be allowed to send to peers
> on the same link.

agree.


> If a socket is bound, I guess the scope_id should be used.  So
> if a socket is not bound to a link local address and the socket
> is used to sent to a link local peer, it should fail.

PF_RDS sockets *MUST* alwasy be bound.  See
Documentation/networking/rds.txt:
"   Sockets must be bound before you can send or receive data.
    This is needed because binding also selects a transport and
    attaches it to the socket. Once bound, the transport assignment
    does not change."

Also, rds_sendmsg checks this (from net-next, your version
has the equivalent ipv6_addr_any etc check):

        if (daddr == 0 || rs->rs_bound_addr == 0) {
                release_sock(sk);
                ret = -ENOTCONN; /* XXX not a great errno */
                goto out;
        }

> 
> >Also, why is there no IPv6 support in rds_connect?
> 
> 
> Oops, I missed this when I ported the internal version to the
> net-next version.  Will add it back.

Ok

--Sowmini
Santosh Shilimkar June 25, 2018, 6:44 p.m. UTC | #5
On 6/25/2018 10:50 AM, Sowmini Varadhan wrote:
> On (06/26/18 01:43), Ka-Cheong Poon wrote:
>>
>> Yes, I think if the socket is bound, it should check the scope_id
>> in msg_name (if not NULL) to make sure that they match.  A bound
>> RDS socket can send to multiple peers.  But if the bound local
>> address is link local, it should only be allowed to send to peers
>> on the same link.
> 
> agree.
Yep. Its inline with RDS bind behavior.

> 
> 
>> If a socket is bound, I guess the scope_id should be used.  So
>> if a socket is not bound to a link local address and the socket
>> is used to sent to a link local peer, it should fail.
> 
> PF_RDS sockets *MUST* alwasy be bound.  See
> Documentation/networking/rds.txt:
> "   Sockets must be bound before you can send or receive data.
>      This is needed because binding also selects a transport and
>      attaches it to the socket. Once bound, the transport assignment
>      does not change."
> 
In any case link local or not, the socket needs to be bound before
any data can be sent as documented. Send path already enforces
it.

>>> Also, why is there no IPv6 support in rds_connect?
>>
>>
>> Oops, I missed this when I ported the internal version to the
>> net-next version.  Will add it back.
> 
So the net-next wasn't tested? IPv6 connections
itself wouldn't be formed with this missing. As mentioned
already, please test v2 before posting on list.

Regards,
Santosh
Ka-Cheong Poon June 26, 2018, 5:30 a.m. UTC | #6
On 06/26/2018 01:50 AM, Sowmini Varadhan wrote:

>> If a socket is bound, I guess the scope_id should be used.  So
>> if a socket is not bound to a link local address and the socket
>> is used to sent to a link local peer, it should fail.
> 
> PF_RDS sockets *MUST* alwasy be bound.  See
> Documentation/networking/rds.txt:
> "   Sockets must be bound before you can send or receive data.
>      This is needed because binding also selects a transport and
>      attaches it to the socket. Once bound, the transport assignment
>      does not change."
> 
> Also, rds_sendmsg checks this (from net-next, your version
> has the equivalent ipv6_addr_any etc check):
> 
>          if (daddr == 0 || rs->rs_bound_addr == 0) {
>                  release_sock(sk);
>                  ret = -ENOTCONN; /* XXX not a great errno */
>                  goto out;
>          }


I think you misunderstood what I wrote.  The above is in response
to your original question:

--
 > And this is even more confusing because the fastpath in rds_sendmsg
 > does not take the bound_scope_id into consideration at all:
 > 1213         if (rs->rs_conn && 
ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
 > 1214                 conn = rs->rs_conn;
 > 1215         else {
 > 1216                 conn = rds_conn_create_outgoing( /* .. */, scope_id)
 > so if I erroneously passed a msg_name on a connected rds socket, what
 > would happen? (see also question about rds_connect() itself, below)
--


My answer to this is that if a socket is not bound to a link
local address (meaning it is bound to a non-link local address)
and it is used to send to a link local peer, I think it should
fail.  This is consistent with the scope_id check I mentioned in
the previous mail.  If the socket is not bound to a link local
address, the bound_scope_id is 0.  So if the socket is used to
send to a link local address (which has a non-zero scope_id), the
check will catch it and fail the call.  A new conn should not
be created in this case.
Sowmini Varadhan June 26, 2018, 10:16 a.m. UTC | #7
On (06/26/18 13:30), Ka-Cheong Poon wrote:
> 
> My answer to this is that if a socket is not bound to a link
> local address (meaning it is bound to a non-link local address)
> and it is used to send to a link local peer, I think it should
> fail.

Hmm, I'm not sure I agree. I dont think this is forbidden
by RFC 6724 - yes, such a packet cannot be forwarded, but
if everything is on  the same link, and the dest only has
a link-local, you should not need to (create and) bind
another socket to a link-local to talk to this destination..

>  This is consistent with the scope_id check I mentioned in
> the previous mail.  If the socket is not bound to a link local
> address, the bound_scope_id is 0.  So if the socket is used to
> send to a link local address (which has a non-zero scope_id), the
> check will catch it and fail the call.  A new conn should not
> be created in this case.
Ka-Cheong Poon June 26, 2018, 1:02 p.m. UTC | #8
On 06/26/2018 06:16 PM, Sowmini Varadhan wrote:
> On (06/26/18 13:30), Ka-Cheong Poon wrote:
>>
>> My answer to this is that if a socket is not bound to a link
>> local address (meaning it is bound to a non-link local address)
>> and it is used to send to a link local peer, I think it should
>> fail.
> 
> Hmm, I'm not sure I agree. I dont think this is forbidden
> by RFC 6724 - yes, such a packet cannot be forwarded, but
> if everything is on  the same link, and the dest only has
> a link-local, you should not need to (create and) bind
> another socket to a link-local to talk to this destination..


In this case, RFC 6724 prefers link local address as source.
While using non-link local address (say ULA) is not forbidden,
doing this can easily cause inter-operability issues (does the
app really know that the non-link local source and the link
local destination addresses are really on the same link?).  I
think it is prudent to disallow this in RDS unless there is a
very clear and important reason to do so.  BTW, if it is really
needed, it can be added in future.


>>   This is consistent with the scope_id check I mentioned in
>> the previous mail.  If the socket is not bound to a link local
>> address, the bound_scope_id is 0.  So if the socket is used to
>> send to a link local address (which has a non-zero scope_id), the
>> check will catch it and fail the call.  A new conn should not
>> be created in this case.
>
Sowmini Varadhan June 26, 2018, 1:08 p.m. UTC | #9
On (06/26/18 21:02), Ka-Cheong Poon wrote:
> 
> In this case, RFC 6724 prefers link local address as source.

the keyword is "prefers". 

> While using non-link local address (say ULA) is not forbidden,
> doing this can easily cause inter-operability issues (does the
> app really know that the non-link local source and the link
> local destination addresses are really on the same link?).  I
> think it is prudent to disallow this in RDS unless there is a
> very clear and important reason to do so. 

I remember the issues that triggered 6724. The "interop" issue
is that when you send from Link-local to global, and need forwarding,
it may not work.

but I dont think an RDS application today expects to deal with
the case that "oh I got back and error when I tried to send to
address X on rds socket rs1, let me go and check what I am bound
to, and maybe create another socket, and bind it to link-local"

You're not doing this for IPv4 and RDS today (you dont have to do this
for UDP, afaik)

This is especially true if "X" is a hostname that got resovled using DNS

> BTW, if it is really > needed, it can be added in future.

shrug. You are introducing a new error return.

--Sowmini
Ka-Cheong Poon June 27, 2018, 10:07 a.m. UTC | #10
On 06/26/2018 09:08 PM, Sowmini Varadhan wrote:
> On (06/26/18 21:02), Ka-Cheong Poon wrote:
>>
>> In this case, RFC 6724 prefers link local address as source.
> 
> the keyword is "prefers".


There is a reason for that.  It is the way folks expect
how IPv6 addresses are being used.


>> While using non-link local address (say ULA) is not forbidden,
>> doing this can easily cause inter-operability issues (does the
>> app really know that the non-link local source and the link
>> local destination addresses are really on the same link?).  I
>> think it is prudent to disallow this in RDS unless there is a
>> very clear and important reason to do so.
> 
> I remember the issues that triggered 6724. The "interop" issue
> is that when you send from Link-local to global, and need forwarding,
> it may not work.


It is not just forwarding.  The simple case is that one
picks a global address in a different link and then
use it to send to a link local address in another link.
This does not work.  And the RDS connection created will
be stuck forever.  I don't think this is a good idea to
have such stuck connections.


> but I dont think an RDS application today expects to deal with
> the case that "oh I got back and error when I tried to send to
> address X on rds socket rs1, let me go and check what I am bound
> to, and maybe create another socket, and bind it to link-local"


I don't expect RDS apps will want to use link local address
in the first place.  In fact, most normal network apps don't.


> You're not doing this for IPv4 and RDS today (you dont have to do this
> for UDP, afaik)


Do you know of any IPv4 RDS app which uses IPv4 link local
address?  In fact, IPv4 link local address is explicitly
disallowed for active active bonding.


> This is especially true if "X" is a hostname that got resovled using DNS


Can you explain why DNS name resolution will return an IPv6
link local address?  I'm surprised if it actually does.


>> BTW, if it is really > needed, it can be added in future.
> 
> shrug. You are introducing a new error return.


An error needs to be returned because it is not allowed.
Sowmini Varadhan June 27, 2018, 10:29 a.m. UTC | #11
On (06/27/18 18:07), Ka-Cheong Poon wrote:
> 
> There is a reason for that.  It is the way folks expect
> how IPv6 addresses are being used.

have you tried "traceoute6 -s abc::2 fe80::2" on linux?

> It is not just forwarding.  The simple case is that one
> picks a global address in a different link and then
> use it to send to a link local address in another link.

This is actually not any different than ipv4's strong/weak ES model.

Global addresses are supposed to be globally routable. For your
above example, if yuu do that, it is assumed that your routing
table has been set up suitably.

To state what may be well-known:
This does not work for link-locals, becuase, as the name 
suggests, those are local to the link and you may have the same
link-local on multiple links

> This does not work.  And the RDS connection created will
> be stuck forever.  

that is a different problem in the RDS implementation (that
it does not backoff and timeout a failing reconnect)

As you can see from the traceroute6 example, global <-> link-local 
is supported for udp (and probably also tcp sockets, I have not checked
that case)

> I don't expect RDS apps will want to use link local address
> in the first place.  In fact, most normal network apps don't.
   :
> Do you know of any IPv4 RDS app which uses IPv4 link local
> address?  In fact, IPv4 link local address is explicitly
> disallowed for active active bonding.

Are we talking about "why this ok for my particular use
of link-local, so I can slide my patch forward" or, 
"why this is correct IPv6 behavior"?

> Can you explain why DNS name resolution will return an IPv6
> link local address?  I'm surprised if it actually does.

It depends on how you set up your DNS.

It seems like this is all about "I dont want to deal with this
now", so I dont want to continue this discussion which is really
going nowhere.

Thanks

--Sowmini
Santosh Shilimkar June 27, 2018, 8:45 p.m. UTC | #12
On 6/27/2018 3:07 AM, Ka-Cheong Poon wrote:
> On 06/26/2018 09:08 PM, Sowmini Varadhan wrote:
>> On (06/26/18 21:02), Ka-Cheong Poon wrote:

[...]

> 
> 
> I don't expect RDS apps will want to use link local address
> in the first place.  In fact, most normal network apps don't.
>
This is not true.

> 
>> You're not doing this for IPv4 and RDS today (you dont have to do this
>> for UDP, afaik)
> 
> 
> Do you know of any IPv4 RDS app which uses IPv4 link local
> address?  In fact, IPv4 link local address is explicitly
> disallowed for active active bonding.
>
Yes. Cluster-ware HAIP makes use of link local addresses. That
check was mainly because of RDMA CM issues but that only means
active-active isn't used. The bonding works just fine and if
needed cluster-ware can also use TCP transport.

Lets not add this new behavior for link local and its
actually not relevant to really v6 addressing support.

Regards,
Santosh
diff mbox series

Patch

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 3a1097e..4c2bf9c 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -164,11 +164,12 @@  int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct in6_addr v6addr, *binding_addr;
 	struct rds_transport *trans;
 	__u32 scope_id = 0;
+	int addr_type;
 	int ret = 0;
 	__be16 port;
 
-	/* We only allow an RDS socket to be bound to and IPv4 address. IPv6
-	 * address support will be added later.
+	/* We allow an RDS socket to be bound to either IPv4 or IPv6
+	 * address.
 	 */
 	if (addr_len == sizeof(struct sockaddr_in)) {
 		struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
@@ -180,7 +181,21 @@  int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		binding_addr = &v6addr;
 		port = sin->sin_port;
 	} else if (addr_len == sizeof(struct sockaddr_in6)) {
-		return -EPROTONOSUPPORT;
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
+
+		addr_type = ipv6_addr_type(&sin6->sin6_addr);
+		if (sin6->sin6_family != AF_INET6 ||
+		    !(addr_type & IPV6_ADDR_UNICAST)) {
+			return -EINVAL;
+		}
+		/* The scope ID must be specified for link local address. */
+		if (addr_type & IPV6_ADDR_LINKLOCAL) {
+			if (sin6->sin6_scope_id == 0)
+				return -EINVAL;
+			scope_id = sin6->sin6_scope_id;
+		}
+		binding_addr = &sin6->sin6_addr;
+		port = sin6->sin6_port;
 	} else {
 		return -EINVAL;
 	}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index ca72563..8c5d093 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -486,10 +486,17 @@  void rds_conn_destroy(struct rds_connection *conn)
 }
 EXPORT_SYMBOL_GPL(rds_conn_destroy);
 
-static void rds_conn_message_info(struct socket *sock, unsigned int len,
-				  struct rds_info_iterator *iter,
-				  struct rds_info_lengths *lens,
-				  int want_send)
+static void __rds_inc_msg_cp(struct rds_incoming *inc,
+			     struct rds_info_iterator *iter,
+			     void *saddr, void *daddr, int flip)
+{
+	rds_inc_info_copy(inc, iter, *(__be32 *)saddr, *(__be32 *)daddr, flip);
+}
+
+static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
+				      struct rds_info_iterator *iter,
+				      struct rds_info_lengths *lens,
+				      int want_send)
 {
 	struct hlist_head *head;
 	struct list_head *list;
@@ -524,18 +531,13 @@  static void rds_conn_message_info(struct socket *sock, unsigned int len,
 
 				/* XXX too lazy to maintain counts.. */
 				list_for_each_entry(rm, list, m_conn_item) {
-					__be32 laddr;
-					__be32 faddr;
-
 					total++;
-					laddr = conn->c_laddr.s6_addr32[3];
-					faddr = conn->c_faddr.s6_addr32[3];
 					if (total <= len)
-						rds_inc_info_copy(&rm->m_inc,
-								  iter,
-								  laddr,
-								  faddr,
-								  0);
+						__rds_inc_msg_cp(&rm->m_inc,
+								 iter,
+								 &conn->c_laddr,
+								 &conn->c_faddr,
+								 0);
 				}
 
 				spin_unlock_irqrestore(&cp->cp_lock, flags);
@@ -548,6 +550,14 @@  static void rds_conn_message_info(struct socket *sock, unsigned int len,
 	lens->each = sizeof(struct rds_info_message);
 }
 
+static void rds_conn_message_info(struct socket *sock, unsigned int len,
+				  struct rds_info_iterator *iter,
+				  struct rds_info_lengths *lens,
+				  int want_send)
+{
+	rds_conn_message_info_cmn(sock, len, iter, lens, want_send);
+}
+
 static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
 				       struct rds_info_iterator *iter,
 				       struct rds_info_lengths *lens)
@@ -655,6 +665,9 @@  static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
 	struct rds_info_connection *cinfo = buffer;
 	struct rds_connection *conn = cp->cp_conn;
 
+	if (conn->c_isv6)
+		return 0;
+
 	cinfo->next_tx_seq = cp->cp_next_tx_seq;
 	cinfo->next_rx_seq = cp->cp_next_rx_seq;
 	cinfo->laddr = conn->c_laddr.s6_addr32[3];
diff --git a/net/rds/ib.c b/net/rds/ib.c
index c712a84..756225c 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -39,6 +39,7 @@ 
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <net/addrconf.h>
 
 #include "rds_single_path.h"
 #include "rds.h"
@@ -295,6 +296,8 @@  static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
 		return 0;
+	if (conn->c_isv6)
+		return 0;
 
 	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
@@ -330,7 +333,6 @@  static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 				sizeof(struct rds_info_rdma_connection));
 }
 
-
 /*
  * Early RDS/IB was built to only bind to an address if there is an IPoIB
  * device with that address set.
@@ -346,8 +348,12 @@  static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
 {
 	int ret;
 	struct rdma_cm_id *cm_id;
+	struct sockaddr_in6 sin6;
 	struct sockaddr_in sin;
+	struct sockaddr *sa;
+	bool isv4;
 
+	isv4 = ipv6_addr_v4mapped(addr);
 	/* Create a CMA ID and try to bind it. This catches both
 	 * IB and iWARP capable NICs.
 	 */
@@ -356,20 +362,53 @@  static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 
-	memset(&sin, 0, sizeof(sin));
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = addr->s6_addr32[3];
+	if (isv4) {
+		memset(&sin, 0, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = addr->s6_addr32[3];
+		sa = (struct sockaddr *)&sin;
+	} else {
+		memset(&sin6, 0, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_addr = *addr;
+		sin6.sin6_scope_id = scope_id;
+		sa = (struct sockaddr *)&sin6;
+
+		/* XXX Do a special IPv6 link local address check here.  The
+		 * reason is that rdma_bind_addr() always succeeds with IPv6
+		 * link local address regardless it is indeed configured in a
+		 * system.
+		 */
+		if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
+			struct net_device *dev;
+
+			if (scope_id == 0)
+				return -EADDRNOTAVAIL;
+
+			/* Use init_net for now as RDS is not network
+			 * name space aware.
+			 */
+			dev = dev_get_by_index(&init_net, scope_id);
+			if (!dev)
+				return -EADDRNOTAVAIL;
+			if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
+				dev_put(dev);
+				return -EADDRNOTAVAIL;
+			}
+			dev_put(dev);
+		}
+	}
 
 	/* rdma_bind_addr will only succeed for IB & iWARP devices */
-	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+	ret = rdma_bind_addr(cm_id, sa);
 	/* due to this, we will claim to support iWARP devices unless we
 	   check node_type. */
 	if (ret || !cm_id->device ||
 	    cm_id->device->node_type != RDMA_NODE_IB_CA)
 		ret = -EADDRNOTAVAIL;
 
-	rdsdebug("addr %pI6c ret %d node type %d\n",
-		 addr, ret,
+	rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
+		 addr, scope_id, ret,
 		 cm_id->device ? cm_id->device->node_type : -1);
 
 	rdma_destroy_id(cm_id);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 31ffa70..03279f3 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -40,7 +40,6 @@ 
 #include "rds_single_path.h"
 #include "rds.h"
 #include "ib.h"
-#include "tcp.h"
 
 /*
  * Set the selected protocol version
@@ -679,7 +678,7 @@  static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
 	return version;
 }
 
-/* Given an IPv6 address, find the IB net_device which hosts that address and
+/* Given an IPv6 address, find the net_device which hosts that address and
  * return its index.  This is used by the rds_ib_cm_handle_connect() code to
  * find the interface index of where an incoming request comes from when
  * the request is using a link local address.
@@ -696,8 +695,7 @@  static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
 
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
-		if (dev->type == ARPHRD_INFINIBAND &&
-		    ipv6_chk_addr(net, addr, dev, 0)) {
+		if (ipv6_chk_addr(net, addr, dev, 0)) {
 			idx = dev->ifindex;
 			break;
 		}
@@ -887,7 +885,10 @@  int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 
 	/* XXX I wonder what affect the port space has */
 	/* delegate cm event handler to rdma_transport */
-	handler = rds_rdma_cm_event_handler;
+	if (conn->c_isv6)
+		handler = rds6_rdma_cm_event_handler;
+	else
+		handler = rds_rdma_cm_event_handler;
 	ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
 				     RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(ic->i_cm_id)) {
@@ -923,7 +924,7 @@  int rds_ib_conn_path_connect(struct rds_conn_path *cp)
 		sin6 = (struct sockaddr_in6 *)&dest;
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_addr = conn->c_faddr;
-		sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+		sin6->sin6_port = (__force u16)htons(RDS_CM_PORT);
 		sin6->sin6_scope_id = conn->c_dev_if;
 	}
 
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index d7da115..6a696b8 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -37,7 +37,9 @@ 
 #include "rdma_transport.h"
 #include "ib.h"
 
+/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
 static struct rdma_cm_id *rds_rdma_listen_id;
+static struct rdma_cm_id *rds6_rdma_listen_id;
 
 int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 				  struct rdma_cm_event *event,
@@ -153,6 +155,12 @@  int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 	return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
 }
 
+int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+			       struct rdma_cm_event *event)
+{
+	return rds_rdma_cm_event_handler_cmn(cm_id, event, true);
+}
+
 static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
 				       struct sockaddr *sa,
 				       struct rdma_cm_id **ret_cm_id)
@@ -199,13 +207,14 @@  static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
 
 /* Initialize the RDS RDMA listeners.  We create two listeners for
  * compatibility reason.  The one on RDS_PORT is used for IPv4
- * requests only.  The one on RDS_TCP_PORT is used for IPv6 requests
+ * requests only.  The one on RDS_CM_PORT is used for IPv6 requests
  * only.  So only IPv6 enabled RDS module will communicate using this
  * port.
  */
 static int rds_rdma_listen_init(void)
 {
 	int ret;
+	struct sockaddr_in6 sin6;
 	struct sockaddr_in sin;
 
 	sin.sin_family = PF_INET;
@@ -214,7 +223,21 @@  static int rds_rdma_listen_init(void)
 	ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
 					  (struct sockaddr *)&sin,
 					  &rds_rdma_listen_id);
-	return ret;
+	if (ret != 0)
+		return ret;
+
+	sin6.sin6_family = PF_INET6;
+	sin6.sin6_addr = in6addr_any;
+	sin6.sin6_port = htons(RDS_CM_PORT);
+	sin6.sin6_scope_id = 0;
+	sin6.sin6_flowinfo = 0;
+	ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler,
+					  (struct sockaddr *)&sin6,
+					  &rds6_rdma_listen_id);
+	/* Keep going even when IPv6 is not enabled in the system. */
+	if (ret != 0)
+		rdsdebug("Cannot set up IPv6 RDMA listener\n");
+	return 0;
 }
 
 static void rds_rdma_listen_stop(void)
@@ -224,6 +247,11 @@  static void rds_rdma_listen_stop(void)
 		rdma_destroy_id(rds_rdma_listen_id);
 		rds_rdma_listen_id = NULL;
 	}
+	if (rds6_rdma_listen_id) {
+		rdsdebug("cm %p\n", rds6_rdma_listen_id);
+		rdma_destroy_id(rds6_rdma_listen_id);
+		rds6_rdma_listen_id = NULL;
+	}
 }
 
 static int rds_rdma_init(void)
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index d309c44..bc3c639 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -11,6 +11,8 @@ 
 int rds_rdma_conn_connect(struct rds_connection *conn);
 int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 			      struct rdma_cm_event *event);
+int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+			       struct rdma_cm_event *event);
 
 /* from ib.c */
 extern struct rds_transport rds_ib_transport;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 859808a..f5f99d1 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -24,13 +24,15 @@ 
 #define RDS_PROTOCOL_MINOR(v)	((v) & 255)
 #define RDS_PROTOCOL(maj, min)	(((maj) << 8) | min)
 
-/*
- * XXX randomly chosen, but at least seems to be unused:
- * #               18464-18768 Unassigned
- * We should do better.  We want a reserved port to discourage unpriv'ed
- * userspace from listening.
+/* The following ports, 16385, 18634, 18635, are registered with IANA as
+ * the ports to be used for RDS over TCP and UDP.  18634 is the historical
+ * value used for the RDMA_CM listener port.  RDS/TCP uses port 16385.  After
+ * IPv6 work, RDMA_CM also uses 16385 as the listener port.  18634 is kept
+ * to ensure compatibility with older RDS modules.
  */
 #define RDS_PORT	18634
+#define RDS_CM_PORT	16385
+#define RDS_TCP_PORT	RDS_CM_PORT
 
 #ifdef ATOMIC64_INIT
 #define KERNEL_HAS_ATOMIC64
diff --git a/net/rds/send.c b/net/rds/send.c
index cc91860..3bc806b 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1105,8 +1105,27 @@  int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 			break;
 
 		case sizeof(*sin6): {
-			ret = -EPROTONOSUPPORT;
-			goto out;
+			int addr_type;
+
+			if (sin6->sin6_family != AF_INET6) {
+				ret = -EINVAL;
+				goto out;
+			}
+			addr_type = ipv6_addr_type(&sin6->sin6_addr);
+			if (!(addr_type & IPV6_ADDR_UNICAST)) {
+				ret = -EINVAL;
+				goto out;
+			}
+			if (addr_type & IPV6_ADDR_LINKLOCAL &&
+			    sin6->sin6_scope_id == 0) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			daddr = sin6->sin6_addr;
+			dport = sin6->sin6_port;
+			scope_id = sin6->sin6_scope_id;
+			break;
 		}
 
 		default:
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index beaff17..fb0dac1 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -46,7 +46,12 @@ 
 /* only for info exporting */
 static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
 static LIST_HEAD(rds_tcp_tc_list);
+
+/* rds_tcp_tc_count counts only IPv4 connections.
+ * rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
+ */
 static unsigned int rds_tcp_tc_count;
+static unsigned int rds6_tcp_tc_count;
 
 /* Track rds_tcp_connection structs so they can be cleaned up */
 static DEFINE_SPINLOCK(rds_tcp_conn_lock);
@@ -113,7 +118,9 @@  void rds_tcp_restore_callbacks(struct socket *sock,
 	/* done under the callback_lock to serialize with write_space */
 	spin_lock(&rds_tcp_tc_list_lock);
 	list_del_init(&tc->t_list_item);
-	rds_tcp_tc_count--;
+	rds6_tcp_tc_count--;
+	if (!tc->t_cpath->cp_conn->c_isv6)
+		rds_tcp_tc_count--;
 	spin_unlock(&rds_tcp_tc_list_lock);
 
 	tc->t_sock = NULL;
@@ -200,7 +207,9 @@  void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
 	/* done under the callback_lock to serialize with write_space */
 	spin_lock(&rds_tcp_tc_list_lock);
 	list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
-	rds_tcp_tc_count++;
+	rds6_tcp_tc_count++;
+	if (!tc->t_cpath->cp_conn->c_isv6)
+		rds_tcp_tc_count++;
 	spin_unlock(&rds_tcp_tc_list_lock);
 
 	/* accepted sockets need our listen data ready undone */
@@ -221,6 +230,9 @@  void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 }
 
+/* Handle RDS_INFO_TCP_SOCKETS socket option.  It only returns IPv4
+ * connections for backward compatibility.
+ */
 static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 			    struct rds_info_iterator *iter,
 			    struct rds_info_lengths *lens)
@@ -228,8 +240,6 @@  static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 	struct rds_info_tcp_socket tsinfo;
 	struct rds_tcp_connection *tc;
 	unsigned long flags;
-	struct sockaddr_in sin;
-	struct socket *sock;
 
 	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 
@@ -237,16 +247,15 @@  static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 		goto out;
 
 	list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+		struct inet_sock *inet = inet_sk(tc->t_sock->sk);
 
-		sock = tc->t_sock;
-		if (sock) {
-			sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
-			tsinfo.local_addr = sin.sin_addr.s_addr;
-			tsinfo.local_port = sin.sin_port;
-			sock->ops->getname(sock, (struct sockaddr *)&sin, 1);
-			tsinfo.peer_addr = sin.sin_addr.s_addr;
-			tsinfo.peer_port = sin.sin_port;
-		}
+		if (tc->t_cpath->cp_conn->c_isv6)
+			continue;
+
+		tsinfo.local_addr = inet->inet_saddr;
+		tsinfo.local_port = inet->inet_sport;
+		tsinfo.peer_addr = inet->inet_daddr;
+		tsinfo.peer_port = inet->inet_dport;
 
 		tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
 		tsinfo.data_rem = tc->t_tinc_data_rem;
@@ -495,13 +504,18 @@  static __net_init int rds_tcp_init_net(struct net *net)
 		err = -ENOMEM;
 		goto fail;
 	}
-	rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
+	rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true);
 	if (!rtn->rds_tcp_listen_sock) {
-		pr_warn("could not set up listen sock\n");
-		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
-		rtn->rds_tcp_sysctl = NULL;
-		err = -EAFNOSUPPORT;
-		goto fail;
+		pr_warn("could not set up IPv6 listen sock\n");
+
+		/* Try IPv4 as some systems disable IPv6 */
+		rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false);
+		if (!rtn->rds_tcp_listen_sock) {
+			unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+			rtn->rds_tcp_sysctl = NULL;
+			err = -EAFNOSUPPORT;
+			goto fail;
+		}
 	}
 	INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
 	return 0;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index c6fa080..6a948c1 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -2,8 +2,6 @@ 
 #ifndef _RDS_TCP_H
 #define _RDS_TCP_H
 
-#define RDS_TCP_PORT	16385
-
 struct rds_tcp_incoming {
 	struct rds_incoming	ti_inc;
 	struct sk_buff_head	ti_skb_list;
@@ -67,7 +65,7 @@  void rds_tcp_restore_callbacks(struct socket *sock,
 void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
-struct socket *rds_tcp_listen_init(struct net *);
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
 void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 0101033..039bd04 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -89,9 +89,11 @@  void rds_tcp_state_change(struct sock *sk)
 int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 {
 	struct socket *sock = NULL;
+	struct sockaddr_in6 sin6;
 	struct sockaddr_in sin;
 	struct sockaddr *addr;
 	int addrlen;
+	bool isv6;
 	int ret;
 	struct rds_connection *conn = cp->cp_conn;
 	struct rds_tcp_connection *tc = cp->cp_transport_data;
@@ -108,18 +110,36 @@  int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 		mutex_unlock(&tc->t_conn_path_lock);
 		return 0;
 	}
-	ret = sock_create_kern(rds_conn_net(conn), PF_INET,
-			       SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (ipv6_addr_v4mapped(&conn->c_laddr)) {
+		ret = sock_create_kern(rds_conn_net(conn), PF_INET,
+				       SOCK_STREAM, IPPROTO_TCP, &sock);
+		isv6 = false;
+	} else {
+		ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
+				       SOCK_STREAM, IPPROTO_TCP, &sock);
+		isv6 = true;
+	}
+
 	if (ret < 0)
 		goto out;
 
 	rds_tcp_tune(sock);
 
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = (__force u32)conn->c_laddr.s6_addr32[3];
-	sin.sin_port = (__force u16)htons(0);
-	addr = (struct sockaddr *)&sin;
-	addrlen = sizeof(sin);
+	if (isv6) {
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_addr = conn->c_laddr;
+		sin6.sin6_port = 0;
+		sin6.sin6_flowinfo = 0;
+		sin6.sin6_scope_id = conn->c_dev_if;
+		addr = (struct sockaddr *)&sin6;
+		addrlen = sizeof(sin6);
+	} else {
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = (__force u32)conn->c_laddr.s6_addr32[3];
+		sin.sin_port = (__force u16)htons(0);
+		addr = (struct sockaddr *)&sin;
+		addrlen = sizeof(sin);
+	}
 
 	ret = sock->ops->bind(sock, addr, addrlen);
 	if (ret) {
@@ -128,11 +148,21 @@  int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 		goto out;
 	}
 
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = (__force u32)conn->c_faddr.s6_addr32[3];
-	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
-	addr = (struct sockaddr *)&sin;
-	addrlen = sizeof(sin);
+	if (isv6) {
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_addr = conn->c_faddr;
+		sin6.sin6_port = htons(RDS_TCP_PORT);
+		sin6.sin6_flowinfo = 0;
+		sin6.sin6_scope_id = conn->c_dev_if;
+		addr = (struct sockaddr *)&sin6;
+		addrlen = sizeof(sin6);
+	} else {
+		sin.sin_family = AF_INET;
+		sin.sin_addr.s_addr = (__force u32)conn->c_faddr.s6_addr32[3];
+		sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+		addr = (struct sockaddr *)&sin;
+		addrlen = sizeof(sin);
+	}
 
 	/*
 	 * once we call connect() we can start getting callbacks and they
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 4fdf5b3..0f996e4 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -256,15 +256,22 @@  void rds_tcp_listen_data_ready(struct sock *sk)
 		ready(sk);
 }
 
-struct socket *rds_tcp_listen_init(struct net *net)
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
 {
-	struct sockaddr_in sin;
 	struct socket *sock = NULL;
+	struct sockaddr_storage ss;
+	struct sockaddr_in6 *sin6;
+	struct sockaddr_in *sin;
+	int addr_len;
 	int ret;
 
-	ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
-	if (ret < 0)
+	ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
+			       IPPROTO_TCP, &sock);
+	if (ret < 0) {
+		rdsdebug("could not create %s listener socket: %d\n",
+			 isv6 ? "IPv6" : "IPv4", ret);
 		goto out;
+	}
 
 	sock->sk->sk_reuse = SK_CAN_REUSE;
 	rds_tcp_nonagle(sock);
@@ -274,13 +281,28 @@  struct socket *rds_tcp_listen_init(struct net *net)
 	sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 
-	sin.sin_family = PF_INET;
-	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
-	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+	if (isv6) {
+		sin6 = (struct sockaddr_in6 *)&ss;
+		sin6->sin6_family = PF_INET6;
+		sin6->sin6_addr = in6addr_any;
+		sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+		sin6->sin6_scope_id = 0;
+		sin6->sin6_flowinfo = 0;
+		addr_len = sizeof(*sin6);
+	} else {
+		sin = (struct sockaddr_in *)&ss;
+		sin->sin_family = PF_INET;
+		sin->sin_addr.s_addr = INADDR_ANY;
+		sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
+		addr_len = sizeof(*sin);
+	}
 
-	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
-	if (ret < 0)
+	ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+	if (ret < 0) {
+		rdsdebug("could not bind %s listener socket: %d\n",
+			 isv6 ? "IPv6" : "IPv4", ret);
 		goto out;
+	}
 
 	ret = sock->ops->listen(sock, 64);
 	if (ret < 0)