diff mbox series

[2/2] migration/rdma: set the REUSEADDR option for destination

Message ID 20220201151136.52157-2-jinpu.wang@ionos.com
State New
Headers show
Series [1/2] migration/rdma: Increase the backlog from 5 to 128 | expand

Commit Message

Jinpu Wang Feb. 1, 2022, 3:11 p.m. UTC
This allow address could be reused to avoid rdma_bind_addr error
out.

Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
---
 migration/rdma.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

Pankaj Gupta Feb. 1, 2022, 6:39 p.m. UTC | #1
> This allow address could be reused to avoid rdma_bind_addr error
> out.

Seems we are proposing to allow multiple connections on same source ip
port pair?
>
> Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> ---
>  migration/rdma.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2e223170d06d..b498ef013c77 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2705,6 +2705,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
>      char ip[40] = "unknown";
>      struct rdma_addrinfo *res, *e;
>      char port_str[16];
> +    int reuse = 1;
>
>      for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
>          rdma->wr_data[idx].control_len = 0;
> @@ -2740,6 +2741,12 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
>          goto err_dest_init_bind_addr;
>      }
>
> +    ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
> +                         &reuse, sizeof reuse);

maybe we can just write '1' directly on the argument list of 'rdma_set_option'.
Assuming reuseaddr does not effect core rdma transport? change seems ok to me.

Thanks,
Pankaj

> +    if (ret) {
> +        ERROR(errp, "Error: could not set REUSEADDR option");
> +        goto err_dest_init_bind_addr;
> +    }
>      for (e = res; e != NULL; e = e->ai_next) {
>          inet_ntop(e->ai_family,
>              &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
> --
> 2.25.1
>
Jinpu Wang Feb. 1, 2022, 8:43 p.m. UTC | #2
On Tue, Feb 1, 2022 at 7:39 PM Pankaj Gupta <pankaj.gupta@ionos.com> wrote:
>
> > This allow address could be reused to avoid rdma_bind_addr error
> > out.
>
> Seems we are proposing to allow multiple connections on same source ip
> port pair?
according to the man page, it's more about the destination side which
is the incoming side.[1]
We hit the error on the migration target when there are many migration
tests in parallel:
"RDMA ERROR: Error: could not rdma_bind_addr!"

[1]https://manpages.debian.org/testing/librdmacm-dev/rdma_set_option.3.en.html
> >
> > Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> > ---
> >  migration/rdma.c | 7 +++++++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/migration/rdma.c b/migration/rdma.c
> > index 2e223170d06d..b498ef013c77 100644
> > --- a/migration/rdma.c
> > +++ b/migration/rdma.c
> > @@ -2705,6 +2705,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
> >      char ip[40] = "unknown";
> >      struct rdma_addrinfo *res, *e;
> >      char port_str[16];
> > +    int reuse = 1;
> >
> >      for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
> >          rdma->wr_data[idx].control_len = 0;
> > @@ -2740,6 +2741,12 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
> >          goto err_dest_init_bind_addr;
> >      }
> >
> > +    ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
> > +                         &reuse, sizeof reuse);
>
> maybe we can just write '1' directly on the argument list of 'rdma_set_option'.
> Assuming reuseaddr does not effect core rdma transport? change seems ok to me.
I feel it's cleaner to do it with a variable than force conversion of
1 to void *.

It's bound to the cm_id which is newly created a few lines above, so
does not affect core rdma transport.

>
> Thanks,
> Pankaj
Thanks for the review!

Jinpu Wang
>
> > +    if (ret) {
> > +        ERROR(errp, "Error: could not set REUSEADDR option");
> > +        goto err_dest_init_bind_addr;
> > +    }
> >      for (e = res; e != NULL; e = e->ai_next) {
> >          inet_ntop(e->ai_family,
> >              &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
> > --
> > 2.25.1
> >
Pankaj Gupta Feb. 2, 2022, 4:57 a.m. UTC | #3
> > > This allow address could be reused to avoid rdma_bind_addr error
> > > out.
> >
> > Seems we are proposing to allow multiple connections on same source ip
> > port pair?
> according to the man page, it's more about the destination side which
> is the incoming side.[1]

By source here I meant generic ip port address pair binding. For this case it is
at destination live migration host.

> We hit the error on the migration target when there are many migration
> tests in parallel:
> "RDMA ERROR: Error: could not rdma_bind_addr!"
o.k. Worth to add this in commit message.

>
> [1]https://manpages.debian.org/testing/librdmacm-dev/rdma_set_option.3.en.html
> > >
> > > Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> > > ---
> > >  migration/rdma.c | 7 +++++++
> > >  1 file changed, 7 insertions(+)
> > >
> > > diff --git a/migration/rdma.c b/migration/rdma.c
> > > index 2e223170d06d..b498ef013c77 100644
> > > --- a/migration/rdma.c
> > > +++ b/migration/rdma.c
> > > @@ -2705,6 +2705,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
> > >      char ip[40] = "unknown";
> > >      struct rdma_addrinfo *res, *e;
> > >      char port_str[16];
> > > +    int reuse = 1;
> > >
> > >      for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
> > >          rdma->wr_data[idx].control_len = 0;
> > > @@ -2740,6 +2741,12 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
> > >          goto err_dest_init_bind_addr;
> > >      }
> > >
> > > +    ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
> > > +                         &reuse, sizeof reuse);
> >
> > maybe we can just write '1' directly on the argument list of 'rdma_set_option'.
> > Assuming reuseaddr does not effect core rdma transport? change seems ok to me.
> I feel it's cleaner to do it with a variable than force conversion of
> 1 to void *.

Adding typecasted 1 seems more cleaner than adding another auto variable.
Will leave this to maintainers to decide.
>
> It's bound to the cm_id which is newly created a few lines above, so
> does not affect core rdma transport.

o.k.
>
> >
> > Thanks,
> > Pankaj
> Thanks for the review!
>
> Jinpu Wang
> >
> > > +    if (ret) {
> > > +        ERROR(errp, "Error: could not set REUSEADDR option");
> > > +        goto err_dest_init_bind_addr;
> > > +    }
> > >      for (e = res; e != NULL; e = e->ai_next) {
> > >          inet_ntop(e->ai_family,
> > >              &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
> > > --
> > > 2.25.1
> > >
Dr. David Alan Gilbert Feb. 2, 2022, 10:15 a.m. UTC | #4
* Jack Wang (jinpu.wang@ionos.com) wrote:
> This allow address could be reused to avoid rdma_bind_addr error
> out.

In what case do you get the error - after a failed migrate and then a
retry?

Dave

> Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> ---
>  migration/rdma.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 2e223170d06d..b498ef013c77 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2705,6 +2705,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
>      char ip[40] = "unknown";
>      struct rdma_addrinfo *res, *e;
>      char port_str[16];
> +    int reuse = 1;
>  
>      for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
>          rdma->wr_data[idx].control_len = 0;
> @@ -2740,6 +2741,12 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
>          goto err_dest_init_bind_addr;
>      }
>  
> +    ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
> +			  &reuse, sizeof reuse);
> +    if (ret) {
> +        ERROR(errp, "Error: could not set REUSEADDR option");
> +        goto err_dest_init_bind_addr;
> +    }
>      for (e = res; e != NULL; e = e->ai_next) {
>          inet_ntop(e->ai_family,
>              &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
> -- 
> 2.25.1
>
Jinpu Wang Feb. 2, 2022, 10:51 a.m. UTC | #5
On Wed, Feb 2, 2022 at 11:15 AM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Jack Wang (jinpu.wang@ionos.com) wrote:
> > This allow address could be reused to avoid rdma_bind_addr error
> > out.
>
> In what case do you get the error - after a failed migrate and then a
> retry?

Yes, what I saw is in case of error, mgmt daemon pick one migration port,
incoming rdma:[::]:8089: RDMA ERROR: Error: could not rdma_bind_addr

Then try another -incoming rdma:[::]:8103, sometime it worked,
sometimes need another try with other ports number.

with this patch, I don't see the error anymore.
>
> Dave
Thanks!
>
> > Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> > ---
> >  migration/rdma.c | 7 +++++++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/migration/rdma.c b/migration/rdma.c
> > index 2e223170d06d..b498ef013c77 100644
> > --- a/migration/rdma.c
> > +++ b/migration/rdma.c
> > @@ -2705,6 +2705,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
> >      char ip[40] = "unknown";
> >      struct rdma_addrinfo *res, *e;
> >      char port_str[16];
> > +    int reuse = 1;
> >
> >      for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
> >          rdma->wr_data[idx].control_len = 0;
> > @@ -2740,6 +2741,12 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
> >          goto err_dest_init_bind_addr;
> >      }
> >
> > +    ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
> > +                       &reuse, sizeof reuse);
> > +    if (ret) {
> > +        ERROR(errp, "Error: could not set REUSEADDR option");
> > +        goto err_dest_init_bind_addr;
> > +    }
> >      for (e = res; e != NULL; e = e->ai_next) {
> >          inet_ntop(e->ai_family,
> >              &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
> > --
> > 2.25.1
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index 2e223170d06d..b498ef013c77 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2705,6 +2705,7 @@  static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
     char ip[40] = "unknown";
     struct rdma_addrinfo *res, *e;
     char port_str[16];
+    int reuse = 1;
 
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         rdma->wr_data[idx].control_len = 0;
@@ -2740,6 +2741,12 @@  static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
         goto err_dest_init_bind_addr;
     }
 
+    ret = rdma_set_option(listen_id, RDMA_OPTION_ID, RDMA_OPTION_ID_REUSEADDR,
+			  &reuse, sizeof reuse);
+    if (ret) {
+        ERROR(errp, "Error: could not set REUSEADDR option");
+        goto err_dest_init_bind_addr;
+    }
     for (e = res; e != NULL; e = e->ai_next) {
         inet_ntop(e->ai_family,
             &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);