diff mbox series

[1/1] net: rds: add service level support in rds-info

Message ID 1566262341-18165-1-git-send-email-yanjun.zhu@oracle.com
State Rejected
Delegated to: David Miller
Headers show
Series [1/1] net: rds: add service level support in rds-info | expand

Commit Message

Zhu Yanjun Aug. 20, 2019, 12:52 a.m. UTC
From IB specific 7.6.5 SERVICE LEVEL, Service Level (SL)
is used to identify different flows within an IBA subnet.
It is carried in the local route header of the packet.

Before this commit, run "rds-info -I". The output is as
below:
"
RDS IB Connections:
 LocalAddr  RemoteAddr Tos SL  LocalDev               RemoteDev
192.2.95.3  192.2.95.1  2   0  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  1   0  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  0   0  fe80::21:28:1a:39  fe80::21:28:10:b9
"
After this commit, the output is as below:
"
RDS IB Connections:
 LocalAddr  RemoteAddr Tos SL  LocalDev               RemoteDev
192.2.95.3  192.2.95.1  2   2  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  1   1  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  0   0  fe80::21:28:1a:39  fe80::21:28:10:b9
"

The commit fe3475af3bdf ("net: rds: add per rds connection cache
statistics") adds cache_allocs in struct rds_info_rdma_connection
as below:
struct rds_info_rdma_connection {
...
        __u32           rdma_mr_max;
        __u32           rdma_mr_size;
        __u8            tos;
        __u32           cache_allocs;
 };
The peer struct in rds-tools of struct rds_info_rdma_connection is as
below:
struct rds_info_rdma_connection {
...
        uint32_t        rdma_mr_max;
        uint32_t        rdma_mr_size;
        uint8_t         tos;
        uint8_t         sl;
        uint32_t        cache_allocs;
};
The difference between userspace and kernel is the member variable sl.
In kernel struct, the member variable sl is missing. This will introduce
risks. So it is necessary to use this commit to avoid this risk.

Fixes: fe3475af3bdf ("net: rds: add per rds connection cache statistics")
CC: Joe Jin <joe.jin@oracle.com>
CC: JUNXIAO_BI <junxiao.bi@oracle.com>
Suggested-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
---
 include/uapi/linux/rds.h |    2 ++
 net/rds/ib.c             |   16 ++++++++++------
 net/rds/ib.h             |    1 +
 net/rds/ib_cm.c          |    3 +++
 net/rds/rdma_transport.c |   10 ++++++++--
 5 files changed, 24 insertions(+), 8 deletions(-)

Comments

Doug Ledford Aug. 20, 2019, 3:28 p.m. UTC | #1
On Mon, 2019-08-19 at 20:52 -0400, Zhu Yanjun wrote:
> diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
> index fd6b5f6..cba368e 100644
> --- a/include/uapi/linux/rds.h
> +++ b/include/uapi/linux/rds.h
> @@ -250,6 +250,7 @@ struct rds_info_rdma_connection {
>         __u32           rdma_mr_max;
>         __u32           rdma_mr_size;
>         __u8            tos;
> +       __u8            sl;
>         __u32           cache_allocs;
>  };
>  
> @@ -265,6 +266,7 @@ struct rds6_info_rdma_connection {
>         __u32           rdma_mr_max;
>         __u32           rdma_mr_size;
>         __u8            tos;
> +       __u8            sl;
>         __u32           cache_allocs;
>  };
>  

This is a user space API break (as was the prior patch mentioned
below)...

> The commit fe3475af3bdf ("net: rds: add per rds connection cache
> statistics") adds cache_allocs in struct rds_info_rdma_connection
> as below:
> struct rds_info_rdma_connection {
> ...
>         __u32           rdma_mr_max;
>         __u32           rdma_mr_size;
>         __u8            tos;
>         __u32           cache_allocs;
>  };
> The peer struct in rds-tools of struct rds_info_rdma_connection is as
> below:
> struct rds_info_rdma_connection {
> ...
>         uint32_t        rdma_mr_max;
>         uint32_t        rdma_mr_size;
>         uint8_t         tos;
>         uint8_t         sl;
>         uint32_t        cache_allocs;
> };

Why are the user space rds tools not using the kernel provided abi
files?

In order to know if this ABI breakage is safe, we need to know what
versions of rds-tools are out in the wild and have their own headers
that we need to match up with.  Are there any versions of rds-tools that
actually use the kernel provided headers?  Are there any other users of
uapi/linux/rds.h besides rds-tools?

Once the kernel and rds-tools package are in sync, rds-tools needs to be
modified to use the kernel header and proper ABI maintenance needs to be
started.
Zhu Yanjun Aug. 21, 2019, 2:10 a.m. UTC | #2
Hiļ¼ŒDoug

My reply is in line.

On 2019/8/20 23:28, Doug Ledford wrote:
> On Mon, 2019-08-19 at 20:52 -0400, Zhu Yanjun wrote:
>> diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
>> index fd6b5f6..cba368e 100644
>> --- a/include/uapi/linux/rds.h
>> +++ b/include/uapi/linux/rds.h
>> @@ -250,6 +250,7 @@ struct rds_info_rdma_connection {
>>          __u32           rdma_mr_max;
>>          __u32           rdma_mr_size;
>>          __u8            tos;
>> +       __u8            sl;
>>          __u32           cache_allocs;
>>   };
>>   
>> @@ -265,6 +266,7 @@ struct rds6_info_rdma_connection {
>>          __u32           rdma_mr_max;
>>          __u32           rdma_mr_size;
>>          __u8            tos;
>> +       __u8            sl;
>>          __u32           cache_allocs;
>>   };
>>   
> This is a user space API break (as was the prior patch mentioned
> below)...
>
>> The commit fe3475af3bdf ("net: rds: add per rds connection cache
>> statistics") adds cache_allocs in struct rds_info_rdma_connection
>> as below:
>> struct rds_info_rdma_connection {
>> ...
>>          __u32           rdma_mr_max;
>>          __u32           rdma_mr_size;
>>          __u8            tos;
>>          __u32           cache_allocs;
>>   };
>> The peer struct in rds-tools of struct rds_info_rdma_connection is as
>> below:
>> struct rds_info_rdma_connection {
>> ...
>>          uint32_t        rdma_mr_max;
>>          uint32_t        rdma_mr_size;
>>          uint8_t         tos;
>>          uint8_t         sl;
>>          uint32_t        cache_allocs;
>> };
> Why are the user space rds tools not using the kernel provided abi
> files?
Perhaps it is a long story.
>
> In order to know if this ABI breakage is safe, we need to know what
> versions of rds-tools are out in the wild and have their own headers
> that we need to match up with.

 From my works in LAB and in the customer's host, rds-tools 2.0.7 is the 
popular

version. Other versions rds-tools are used less.

>    Are there any versions of rds-tools that
> actually use the kernel provided headers?

"the kernel provided headers", do you mean include/uapi/linux/rds.h?

I checked the rds-tools source code. I do not find any version of 
rds-tools us this header files.

> Are there any other users of
> uapi/linux/rds.h besides rds-tools?

Not sure. But in Oracle, there are some rds applications. I am not sure 
whether these rds applications

will use include/uapi/linux/rds.h file or not.

I will investigate it.

>
> Once the kernel and rds-tools package are in sync,

After this commit is merged into mailine, the kernel and rds-tools 
package are in sync.

I will make investigations about rds-tools using the kernel header 
include/uapi/linux/rds.h.

Thanks a lot for your comments.

Zhu Yanjun

>   rds-tools needs to be
> modified to use the kernel header and proper ABI maintenance needs to be
> started.
>
diff mbox series

Patch

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index fd6b5f6..cba368e 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -250,6 +250,7 @@  struct rds_info_rdma_connection {
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
 	__u8		tos;
+	__u8		sl;
 	__u32		cache_allocs;
 };
 
@@ -265,6 +266,7 @@  struct rds6_info_rdma_connection {
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
 	__u8		tos;
+	__u8		sl;
 	__u32		cache_allocs;
 };
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ec05d91..45acab2 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -291,7 +291,7 @@  static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 				    void *buffer)
 {
 	struct rds_info_rdma_connection *iinfo = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -301,15 +301,16 @@  static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
-	iinfo->tos = conn->c_tos;
+	if (ic) {
+		iinfo->tos = conn->c_tos;
+		iinfo->sl = ic->i_sl;
+	}
 
 	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
 	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
-
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
 			       (union ib_gid *)&iinfo->dst_gid);
 
@@ -329,7 +330,7 @@  static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 				     void *buffer)
 {
 	struct rds6_info_rdma_connection *iinfo6 = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -337,6 +338,10 @@  static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo6->src_addr = conn->c_laddr;
 	iinfo6->dst_addr = conn->c_faddr;
+	if (ic) {
+		iinfo6->tos = conn->c_tos;
+		iinfo6->sl = ic->i_sl;
+	}
 
 	memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
 	memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
@@ -344,7 +349,6 @@  static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
 			       (union ib_gid *)&iinfo6->dst_gid);
 		rds_ibdev = ic->rds_ibdev;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 303c6ee..f2b558e 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -220,6 +220,7 @@  struct rds_ib_connection {
 	/* Send/Recv vectors */
 	int			i_scq_vector;
 	int			i_rcq_vector;
+	u8			i_sl;
 };
 
 /* This assumes that atomic_t is at least 32 bits */
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index fddaa09..233f136 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -152,6 +152,9 @@  void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 		  RDS_PROTOCOL_MINOR(conn->c_version),
 		  ic->i_flowctl ? ", flow control" : "");
 
+	/* receive sl from the peer */
+	ic->i_sl = ic->i_cm_id->route.path_rec->sl;
+
 	atomic_set(&ic->i_cq_quiesce, 0);
 
 	/* Init rings and fill recv. this needs to wait until protocol
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index ff74c4b..28668ad 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -43,6 +43,9 @@ 
 static struct rdma_cm_id *rds6_rdma_listen_id;
 #endif
 
+/* Per IB specification 7.7.3, service level is a 4-bit field. */
+#define TOS_TO_SL(tos)		((tos) & 0xF)
+
 static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 					 struct rdma_cm_event *event,
 					 bool isv6)
@@ -97,10 +100,13 @@  static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 			struct rds_ib_connection *ibic;
 
 			ibic = conn->c_transport_data;
-			if (ibic && ibic->i_cm_id == cm_id)
+			if (ibic && ibic->i_cm_id == cm_id) {
+				cm_id->route.path_rec[0].sl =
+					TOS_TO_SL(conn->c_tos);
 				ret = trans->cm_initiate_connect(cm_id, isv6);
-			else
+			} else {
 				rds_conn_drop(conn);
+			}
 		}
 		break;