Patchwork [U-Boot] net: nfs: add dynamic wait period

login
register
mail settings
Submitter Matthias Brugger
Date Dec. 11, 2012, 6:14 p.m.
Message ID <1355249656-31222-2-git-send-email-matthias.bgg@gmail.com>
Download mbox | patch
Permalink /patch/205281/
State Accepted
Delegated to: Joe Hershberger
Headers show

Comments

Matthias Brugger - Dec. 11, 2012, 6:14 p.m.
This patch tackles the time out problem which leads to break the
boot process, when loading file over nfs. The patch does two things.

First of all, we just ignore messages that arrive with a rpc_id smaller
then the client id. We just interpret this messages as answers to
formaly timed out messages.

Second, when a time out occurs we double the time to wait, so that we
do not stress the server resending the last message.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 net/nfs.c |   73 +++++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 21 deletions(-)
Matthias Brugger - Jan. 26, 2013, 3:23 p.m.
2012/12/11 Matthias Brugger <matthias.bgg@gmail.com>:
> This patch tackles the time out problem which leads to break the
> boot process, when loading file over nfs. The patch does two things.
>
> First of all, we just ignore messages that arrive with a rpc_id smaller
> then the client id. We just interpret this messages as answers to
> formaly timed out messages.
>
> Second, when a time out occurs we double the time to wait, so that we
> do not stress the server resending the last message.

Any comment on the patch?

>
> Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
> ---
>  net/nfs.c |   73 +++++++++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 52 insertions(+), 21 deletions(-)
>
> diff --git a/net/nfs.c b/net/nfs.c
> index 7f2393f..84aeda1 100644
> --- a/net/nfs.c
> +++ b/net/nfs.c
> @@ -37,10 +37,14 @@
>  # define NFS_TIMEOUT CONFIG_NFS_TIMEOUT
>  #endif
>
> +#define NFS_RPC_ERR    1
> +#define NFS_RPC_DROP   124
> +
>  static int fs_mounted;
>  static unsigned long rpc_id;
>  static int nfs_offset = -1;
>  static int nfs_len;
> +static ulong nfs_timeout = NFS_TIMEOUT;
>
>  static char dirfh[NFS_FHSIZE]; /* file handle of directory */
>  static char filefh[NFS_FHSIZE]; /* file handle of kernel image */
> @@ -399,8 +403,10 @@ rpc_lookup_reply(int prog, uchar *pkt, unsigned len)
>
>         debug("%s\n", __func__);
>
> -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> -               return -1;
> +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> +               return -NFS_RPC_ERR;
> +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> +               return -NFS_RPC_DROP;
>
>         if (rpc_pkt.u.reply.rstatus  ||
>             rpc_pkt.u.reply.verifier ||
> @@ -428,8 +434,10 @@ nfs_mount_reply(uchar *pkt, unsigned len)
>
>         memcpy((unsigned char *)&rpc_pkt, pkt, len);
>
> -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> -               return -1;
> +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> +               return -NFS_RPC_ERR;
> +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> +               return -NFS_RPC_DROP;
>
>         if (rpc_pkt.u.reply.rstatus  ||
>             rpc_pkt.u.reply.verifier ||
> @@ -452,8 +460,10 @@ nfs_umountall_reply(uchar *pkt, unsigned len)
>
>         memcpy((unsigned char *)&rpc_pkt, pkt, len);
>
> -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> -               return -1;
> +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> +               return -NFS_RPC_ERR;
> +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> +               return -NFS_RPC_DROP;
>
>         if (rpc_pkt.u.reply.rstatus  ||
>             rpc_pkt.u.reply.verifier ||
> @@ -475,8 +485,10 @@ nfs_lookup_reply(uchar *pkt, unsigned len)
>
>         memcpy((unsigned char *)&rpc_pkt, pkt, len);
>
> -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> -               return -1;
> +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> +               return -NFS_RPC_ERR;
> +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> +               return -NFS_RPC_DROP;
>
>         if (rpc_pkt.u.reply.rstatus  ||
>             rpc_pkt.u.reply.verifier ||
> @@ -499,8 +511,10 @@ nfs_readlink_reply(uchar *pkt, unsigned len)
>
>         memcpy((unsigned char *)&rpc_pkt, pkt, len);
>
> -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> -               return -1;
> +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> +               return -NFS_RPC_ERR;
> +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> +               return -NFS_RPC_DROP;
>
>         if (rpc_pkt.u.reply.rstatus  ||
>             rpc_pkt.u.reply.verifier ||
> @@ -534,8 +548,10 @@ nfs_read_reply(uchar *pkt, unsigned len)
>
>         memcpy((uchar *)&rpc_pkt, pkt, sizeof(rpc_pkt.u.reply));
>
> -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> -               return -1;
> +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> +               return -NFS_RPC_ERR;
> +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> +               return -NFS_RPC_DROP;
>
>         if (rpc_pkt.u.reply.rstatus  ||
>             rpc_pkt.u.reply.verifier ||
> @@ -574,7 +590,7 @@ NfsTimeout(void)
>                 NetStartAgain();
>         } else {
>                 puts("T ");
> -               NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
> +               NetSetTimeout(nfs_timeout + NFS_TIMEOUT * NfsTimeoutCount, NfsTimeout);
>                 NfsSend();
>         }
>  }
> @@ -583,6 +599,7 @@ static void
>  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
>  {
>         int rlen;
> +       int reply;
>
>         debug("%s\n", __func__);
>
> @@ -591,19 +608,24 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
>
>         switch (NfsState) {
>         case STATE_PRCLOOKUP_PROG_MOUNT_REQ:
> -               rpc_lookup_reply(PROG_MOUNT, pkt, len);
> +               if (rpc_lookup_reply(PROG_MOUNT, pkt, len) == -NFS_RPC_DROP)
> +                       break;
>                 NfsState = STATE_PRCLOOKUP_PROG_NFS_REQ;
>                 NfsSend();
>                 break;
>
>         case STATE_PRCLOOKUP_PROG_NFS_REQ:
> -               rpc_lookup_reply(PROG_NFS, pkt, len);
> +               if (rpc_lookup_reply(PROG_NFS, pkt, len) == -NFS_RPC_DROP)
> +                       break;
>                 NfsState = STATE_MOUNT_REQ;
>                 NfsSend();
>                 break;
>
>         case STATE_MOUNT_REQ:
> -               if (nfs_mount_reply(pkt, len)) {
> +               reply = nfs_mount_reply(pkt, len);
> +               if (reply == -NFS_RPC_DROP)
> +                       break;
> +               else if (reply == -NFS_RPC_ERR) {
>                         puts("*** ERROR: Cannot mount\n");
>                         /* just to be sure... */
>                         NfsState = STATE_UMOUNT_REQ;
> @@ -615,7 +637,10 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
>                 break;
>
>         case STATE_UMOUNT_REQ:
> -               if (nfs_umountall_reply(pkt, len)) {
> +               reply = nfs_umountall_reply(pkt, len);
> +               if (reply == -NFS_RPC_DROP)
> +                       break;
> +               else if (reply == -NFS_RPC_ERR) {
>                         puts("*** ERROR: Cannot umount\n");
>                         net_set_state(NETLOOP_FAIL);
>                 } else {
> @@ -625,7 +650,10 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
>                 break;
>
>         case STATE_LOOKUP_REQ:
> -               if (nfs_lookup_reply(pkt, len)) {
> +               reply = nfs_lookup_reply(pkt, len);
> +               if (reply == -NFS_RPC_DROP)
> +                       break;
> +               else if (reply == -NFS_RPC_ERR) {
>                         puts("*** ERROR: File lookup fail\n");
>                         NfsState = STATE_UMOUNT_REQ;
>                         NfsSend();
> @@ -638,7 +666,10 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
>                 break;
>
>         case STATE_READLINK_REQ:
> -               if (nfs_readlink_reply(pkt, len)) {
> +               reply = nfs_readlink_reply(pkt, len);
> +               if (reply == -NFS_RPC_DROP)
> +                       break;
> +               else if (reply == -NFS_RPC_ERR) {
>                         puts("*** ERROR: Symlink fail\n");
>                         NfsState = STATE_UMOUNT_REQ;
>                         NfsSend();
> @@ -654,7 +685,7 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
>
>         case STATE_READ_REQ:
>                 rlen = nfs_read_reply(pkt, len);
> -               NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
> +               NetSetTimeout(nfs_timeout, NfsTimeout);
>                 if (rlen > 0) {
>                         nfs_offset += rlen;
>                         NfsSend();
> @@ -738,7 +769,7 @@ NfsStart(void)
>         printf("\nLoad address: 0x%lx\n"
>                 "Loading: *\b", load_addr);
>
> -       NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
> +       NetSetTimeout(nfs_timeout, NfsTimeout);
>         net_set_udp_handler(NfsHandler);
>
>         NfsTimeoutCount = 0;
> --
> 1.7.9.5
>

Hi Joe and Wolfgang,

any comment on this?

Best regards,
Matthias
Enric Balletbò i Serra - April 12, 2013, 8:08 a.m.
Hi all,

2013/1/26 Matthias Brugger <matthias.bgg@gmail.com>

> 2012/12/11 Matthias Brugger <matthias.bgg@gmail.com>:
> > This patch tackles the time out problem which leads to break the
> > boot process, when loading file over nfs. The patch does two things.
> >
> > First of all, we just ignore messages that arrive with a rpc_id smaller
> > then the client id. We just interpret this messages as answers to
> > formaly timed out messages.
> >
> > Second, when a time out occurs we double the time to wait, so that we
> > do not stress the server resending the last message.
>
> Any comment on the patch?
>
> >
> > Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
> > ---
> >  net/nfs.c |   73
> +++++++++++++++++++++++++++++++++++++++++++------------------
> >  1 file changed, 52 insertions(+), 21 deletions(-)
> >
> > diff --git a/net/nfs.c b/net/nfs.c
> > index 7f2393f..84aeda1 100644
> > --- a/net/nfs.c
> > +++ b/net/nfs.c
> > @@ -37,10 +37,14 @@
> >  # define NFS_TIMEOUT CONFIG_NFS_TIMEOUT
> >  #endif
> >
> > +#define NFS_RPC_ERR    1
> > +#define NFS_RPC_DROP   124
> > +
> >  static int fs_mounted;
> >  static unsigned long rpc_id;
> >  static int nfs_offset = -1;
> >  static int nfs_len;
> > +static ulong nfs_timeout = NFS_TIMEOUT;
> >
> >  static char dirfh[NFS_FHSIZE]; /* file handle of directory */
> >  static char filefh[NFS_FHSIZE]; /* file handle of kernel image */
> > @@ -399,8 +403,10 @@ rpc_lookup_reply(int prog, uchar *pkt, unsigned len)
> >
> >         debug("%s\n", __func__);
> >
> > -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> > -               return -1;
> > +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> > +               return -NFS_RPC_ERR;
> > +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> > +               return -NFS_RPC_DROP;
> >
> >         if (rpc_pkt.u.reply.rstatus  ||
> >             rpc_pkt.u.reply.verifier ||
> > @@ -428,8 +434,10 @@ nfs_mount_reply(uchar *pkt, unsigned len)
> >
> >         memcpy((unsigned char *)&rpc_pkt, pkt, len);
> >
> > -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> > -               return -1;
> > +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> > +               return -NFS_RPC_ERR;
> > +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> > +               return -NFS_RPC_DROP;
> >
> >         if (rpc_pkt.u.reply.rstatus  ||
> >             rpc_pkt.u.reply.verifier ||
> > @@ -452,8 +460,10 @@ nfs_umountall_reply(uchar *pkt, unsigned len)
> >
> >         memcpy((unsigned char *)&rpc_pkt, pkt, len);
> >
> > -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> > -               return -1;
> > +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> > +               return -NFS_RPC_ERR;
> > +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> > +               return -NFS_RPC_DROP;
> >
> >         if (rpc_pkt.u.reply.rstatus  ||
> >             rpc_pkt.u.reply.verifier ||
> > @@ -475,8 +485,10 @@ nfs_lookup_reply(uchar *pkt, unsigned len)
> >
> >         memcpy((unsigned char *)&rpc_pkt, pkt, len);
> >
> > -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> > -               return -1;
> > +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> > +               return -NFS_RPC_ERR;
> > +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> > +               return -NFS_RPC_DROP;
> >
> >         if (rpc_pkt.u.reply.rstatus  ||
> >             rpc_pkt.u.reply.verifier ||
> > @@ -499,8 +511,10 @@ nfs_readlink_reply(uchar *pkt, unsigned len)
> >
> >         memcpy((unsigned char *)&rpc_pkt, pkt, len);
> >
> > -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> > -               return -1;
> > +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> > +               return -NFS_RPC_ERR;
> > +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> > +               return -NFS_RPC_DROP;
> >
> >         if (rpc_pkt.u.reply.rstatus  ||
> >             rpc_pkt.u.reply.verifier ||
> > @@ -534,8 +548,10 @@ nfs_read_reply(uchar *pkt, unsigned len)
> >
> >         memcpy((uchar *)&rpc_pkt, pkt, sizeof(rpc_pkt.u.reply));
> >
> > -       if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
> > -               return -1;
> > +       if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
> > +               return -NFS_RPC_ERR;
> > +       else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
> > +               return -NFS_RPC_DROP;
> >
> >         if (rpc_pkt.u.reply.rstatus  ||
> >             rpc_pkt.u.reply.verifier ||
> > @@ -574,7 +590,7 @@ NfsTimeout(void)
> >                 NetStartAgain();
> >         } else {
> >                 puts("T ");
> > -               NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
> > +               NetSetTimeout(nfs_timeout + NFS_TIMEOUT *
> NfsTimeoutCount, NfsTimeout);
> >                 NfsSend();
> >         }
> >  }
> > @@ -583,6 +599,7 @@ static void
> >  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src,
> unsigned len)
> >  {
> >         int rlen;
> > +       int reply;
> >
> >         debug("%s\n", __func__);
> >
> > @@ -591,19 +608,24 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t
> sip, unsigned src, unsigned len)
> >
> >         switch (NfsState) {
> >         case STATE_PRCLOOKUP_PROG_MOUNT_REQ:
> > -               rpc_lookup_reply(PROG_MOUNT, pkt, len);
> > +               if (rpc_lookup_reply(PROG_MOUNT, pkt, len) ==
> -NFS_RPC_DROP)
> > +                       break;
> >                 NfsState = STATE_PRCLOOKUP_PROG_NFS_REQ;
> >                 NfsSend();
> >                 break;
> >
> >         case STATE_PRCLOOKUP_PROG_NFS_REQ:
> > -               rpc_lookup_reply(PROG_NFS, pkt, len);
> > +               if (rpc_lookup_reply(PROG_NFS, pkt, len) ==
> -NFS_RPC_DROP)
> > +                       break;
> >                 NfsState = STATE_MOUNT_REQ;
> >                 NfsSend();
> >                 break;
> >
> >         case STATE_MOUNT_REQ:
> > -               if (nfs_mount_reply(pkt, len)) {
> > +               reply = nfs_mount_reply(pkt, len);
> > +               if (reply == -NFS_RPC_DROP)
> > +                       break;
> > +               else if (reply == -NFS_RPC_ERR) {
> >                         puts("*** ERROR: Cannot mount\n");
> >                         /* just to be sure... */
> >                         NfsState = STATE_UMOUNT_REQ;
> > @@ -615,7 +637,10 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip,
> unsigned src, unsigned len)
> >                 break;
> >
> >         case STATE_UMOUNT_REQ:
> > -               if (nfs_umountall_reply(pkt, len)) {
> > +               reply = nfs_umountall_reply(pkt, len);
> > +               if (reply == -NFS_RPC_DROP)
> > +                       break;
> > +               else if (reply == -NFS_RPC_ERR) {
> >                         puts("*** ERROR: Cannot umount\n");
> >                         net_set_state(NETLOOP_FAIL);
> >                 } else {
> > @@ -625,7 +650,10 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip,
> unsigned src, unsigned len)
> >                 break;
> >
> >         case STATE_LOOKUP_REQ:
> > -               if (nfs_lookup_reply(pkt, len)) {
> > +               reply = nfs_lookup_reply(pkt, len);
> > +               if (reply == -NFS_RPC_DROP)
> > +                       break;
> > +               else if (reply == -NFS_RPC_ERR) {
> >                         puts("*** ERROR: File lookup fail\n");
> >                         NfsState = STATE_UMOUNT_REQ;
> >                         NfsSend();
> > @@ -638,7 +666,10 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip,
> unsigned src, unsigned len)
> >                 break;
> >
> >         case STATE_READLINK_REQ:
> > -               if (nfs_readlink_reply(pkt, len)) {
> > +               reply = nfs_readlink_reply(pkt, len);
> > +               if (reply == -NFS_RPC_DROP)
> > +                       break;
> > +               else if (reply == -NFS_RPC_ERR) {
> >                         puts("*** ERROR: Symlink fail\n");
> >                         NfsState = STATE_UMOUNT_REQ;
> >                         NfsSend();
> > @@ -654,7 +685,7 @@ NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip,
> unsigned src, unsigned len)
> >
> >         case STATE_READ_REQ:
> >                 rlen = nfs_read_reply(pkt, len);
> > -               NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
> > +               NetSetTimeout(nfs_timeout, NfsTimeout);
> >                 if (rlen > 0) {
> >                         nfs_offset += rlen;
> >                         NfsSend();
> > @@ -738,7 +769,7 @@ NfsStart(void)
> >         printf("\nLoad address: 0x%lx\n"
> >                 "Loading: *\b", load_addr);
> >
> > -       NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
> > +       NetSetTimeout(nfs_timeout, NfsTimeout);
> >         net_set_udp_handler(NfsHandler);
> >
> >         NfsTimeoutCount = 0;
> > --
> > 1.7.9.5
> >
>
> Hi Joe and Wolfgang,
>
> any comment on this?
>

I had a problem when I tried to load via NFS. Firstly I tried to load the
dtb image via NFS, secondly I tried to load the kernel image but if it
doesn't work and the file is never loaded.

Applying this patch solved the problem, but seems nobody acked or answered
Matthias. Is this patch the proper fix ? In that case it's possible to
apply ? If not, any comments how to solve this problem.

Best regards,
    Enric


>
> Best regards,
> Matthias
>
> --
> ---
> motzblog.wordpress.com
> _______________________________________________
> U-Boot mailing list
> U-Boot@lists.denx.de
> http://lists.denx.de/mailman/listinfo/u-boot
>
Joe Hershberger - April 12, 2013, 1:56 p.m.
Hi Matthias and Enric,

On Fri, Apr 12, 2013 at 3:08 AM, Enric Balletbo Serra
<eballetbo@gmail.com> wrote:
> Hi all,
>
> 2013/1/26 Matthias Brugger <matthias.bgg@gmail.com>
>>
>> 2012/12/11 Matthias Brugger <matthias.bgg@gmail.com>:
>> > This patch tackles the time out problem which leads to break the
>> > boot process, when loading file over nfs. The patch does two things.
>> >
>> > First of all, we just ignore messages that arrive with a rpc_id smaller
>> > then the client id. We just interpret this messages as answers to
>> > formaly timed out messages.
>> >
>> > Second, when a time out occurs we double the time to wait, so that we
>> > do not stress the server resending the last message.
>>
>> Any comment on the patch?
>>
>> Hi Joe and Wolfgang,
>>
>> any comment on this?
>
>
> I had a problem when I tried to load via NFS. Firstly I tried to load the
> dtb image via NFS, secondly I tried to load the kernel image but if it
> doesn't work and the file is never loaded.
>
> Applying this patch solved the problem, but seems nobody acked or answered
> Matthias. Is this patch the proper fix ? In that case it's possible to apply
> ? If not, any comments how to solve this problem.
>
> Best regards,
>     Enric

I apologize for the tardiness of dealing with patches this release
cycle.  I've been swamped at work.  I'll get to these soon.

Enric, would you like to send a Tested-by?

Thanks,
-Joe
Enric Balletbò i Serra - April 12, 2013, 2:11 p.m.
Hi Joe,

Thanks for answer.


2013/4/12 Joe Hershberger <joe.hershberger@gmail.com>

> Hi Matthias and Enric,
>
> On Fri, Apr 12, 2013 at 3:08 AM, Enric Balletbo Serra
> <eballetbo@gmail.com> wrote:
> > Hi all,
> >
> > 2013/1/26 Matthias Brugger <matthias.bgg@gmail.com>
> >>
> >> 2012/12/11 Matthias Brugger <matthias.bgg@gmail.com>:
> >> > This patch tackles the time out problem which leads to break the
> >> > boot process, when loading file over nfs. The patch does two things.
> >> >
> >> > First of all, we just ignore messages that arrive with a rpc_id
> smaller
> >> > then the client id. We just interpret this messages as answers to
> >> > formaly timed out messages.
> >> >
> >> > Second, when a time out occurs we double the time to wait, so that we
> >> > do not stress the server resending the last message.
> >>
> >> Any comment on the patch?
> >>
> >> Hi Joe and Wolfgang,
> >>
> >> any comment on this?
> >
> >
> > I had a problem when I tried to load via NFS. Firstly I tried to load the
> > dtb image via NFS, secondly I tried to load the kernel image but if it
> > doesn't work and the file is never loaded.
> >
> > Applying this patch solved the problem, but seems nobody acked or
> answered
> > Matthias. Is this patch the proper fix ? In that case it's possible to
> apply
> > ? If not, any comments how to solve this problem.
> >
> > Best regards,
> >     Enric
>
> I apologize for the tardiness of dealing with patches this release
> cycle.  I've been swamped at work.  I'll get to these soon.
>
> Enric, would you like to send a Tested-by?
>
> Thanks,
> -Joe
>

I tested this patch with an IGEPv2 board and an IGEP COM AQUILA.

Tested-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Joe Hershberger - July 8, 2013, 4:01 p.m.
On Tue, Dec 11, 2012 at 12:14 PM, Matthias Brugger
<matthias.bgg@gmail.com> wrote:
> This patch tackles the time out problem which leads to break the
> boot process, when loading file over nfs. The patch does two things.
>
> First of all, we just ignore messages that arrive with a rpc_id smaller
> then the client id. We just interpret this messages as answers to
> formaly timed out messages.
>
> Second, when a time out occurs we double the time to wait, so that we
> do not stress the server resending the last message.
>
> Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>

Applied, Thanks.
-Joe

Patch

diff --git a/net/nfs.c b/net/nfs.c
index 7f2393f..84aeda1 100644
--- a/net/nfs.c
+++ b/net/nfs.c
@@ -37,10 +37,14 @@ 
 # define NFS_TIMEOUT CONFIG_NFS_TIMEOUT
 #endif
 
+#define NFS_RPC_ERR	1
+#define NFS_RPC_DROP	124
+
 static int fs_mounted;
 static unsigned long rpc_id;
 static int nfs_offset = -1;
 static int nfs_len;
+static ulong nfs_timeout = NFS_TIMEOUT;
 
 static char dirfh[NFS_FHSIZE];	/* file handle of directory */
 static char filefh[NFS_FHSIZE]; /* file handle of kernel image */
@@ -399,8 +403,10 @@  rpc_lookup_reply(int prog, uchar *pkt, unsigned len)
 
 	debug("%s\n", __func__);
 
-	if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
-		return -1;
+	if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
+		return -NFS_RPC_ERR;
+	else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
+		return -NFS_RPC_DROP;
 
 	if (rpc_pkt.u.reply.rstatus  ||
 	    rpc_pkt.u.reply.verifier ||
@@ -428,8 +434,10 @@  nfs_mount_reply(uchar *pkt, unsigned len)
 
 	memcpy((unsigned char *)&rpc_pkt, pkt, len);
 
-	if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
-		return -1;
+	if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
+		return -NFS_RPC_ERR;
+	else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
+		return -NFS_RPC_DROP;
 
 	if (rpc_pkt.u.reply.rstatus  ||
 	    rpc_pkt.u.reply.verifier ||
@@ -452,8 +460,10 @@  nfs_umountall_reply(uchar *pkt, unsigned len)
 
 	memcpy((unsigned char *)&rpc_pkt, pkt, len);
 
-	if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
-		return -1;
+	if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
+		return -NFS_RPC_ERR;
+	else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
+		return -NFS_RPC_DROP;
 
 	if (rpc_pkt.u.reply.rstatus  ||
 	    rpc_pkt.u.reply.verifier ||
@@ -475,8 +485,10 @@  nfs_lookup_reply(uchar *pkt, unsigned len)
 
 	memcpy((unsigned char *)&rpc_pkt, pkt, len);
 
-	if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
-		return -1;
+	if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
+		return -NFS_RPC_ERR;
+	else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
+		return -NFS_RPC_DROP;
 
 	if (rpc_pkt.u.reply.rstatus  ||
 	    rpc_pkt.u.reply.verifier ||
@@ -499,8 +511,10 @@  nfs_readlink_reply(uchar *pkt, unsigned len)
 
 	memcpy((unsigned char *)&rpc_pkt, pkt, len);
 
-	if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
-		return -1;
+	if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
+		return -NFS_RPC_ERR;
+	else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
+		return -NFS_RPC_DROP;
 
 	if (rpc_pkt.u.reply.rstatus  ||
 	    rpc_pkt.u.reply.verifier ||
@@ -534,8 +548,10 @@  nfs_read_reply(uchar *pkt, unsigned len)
 
 	memcpy((uchar *)&rpc_pkt, pkt, sizeof(rpc_pkt.u.reply));
 
-	if (ntohl(rpc_pkt.u.reply.id) != rpc_id)
-		return -1;
+	if (ntohl(rpc_pkt.u.reply.id) > rpc_id)
+		return -NFS_RPC_ERR;
+	else if (ntohl(rpc_pkt.u.reply.id) < rpc_id)
+		return -NFS_RPC_DROP;
 
 	if (rpc_pkt.u.reply.rstatus  ||
 	    rpc_pkt.u.reply.verifier ||
@@ -574,7 +590,7 @@  NfsTimeout(void)
 		NetStartAgain();
 	} else {
 		puts("T ");
-		NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
+		NetSetTimeout(nfs_timeout + NFS_TIMEOUT * NfsTimeoutCount, NfsTimeout);
 		NfsSend();
 	}
 }
@@ -583,6 +599,7 @@  static void
 NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
 {
 	int rlen;
+	int reply;
 
 	debug("%s\n", __func__);
 
@@ -591,19 +608,24 @@  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
 
 	switch (NfsState) {
 	case STATE_PRCLOOKUP_PROG_MOUNT_REQ:
-		rpc_lookup_reply(PROG_MOUNT, pkt, len);
+		if (rpc_lookup_reply(PROG_MOUNT, pkt, len) == -NFS_RPC_DROP)
+			break;
 		NfsState = STATE_PRCLOOKUP_PROG_NFS_REQ;
 		NfsSend();
 		break;
 
 	case STATE_PRCLOOKUP_PROG_NFS_REQ:
-		rpc_lookup_reply(PROG_NFS, pkt, len);
+		if (rpc_lookup_reply(PROG_NFS, pkt, len) == -NFS_RPC_DROP)
+			break;
 		NfsState = STATE_MOUNT_REQ;
 		NfsSend();
 		break;
 
 	case STATE_MOUNT_REQ:
-		if (nfs_mount_reply(pkt, len)) {
+		reply = nfs_mount_reply(pkt, len);
+		if (reply == -NFS_RPC_DROP)
+			break;
+		else if (reply == -NFS_RPC_ERR) {
 			puts("*** ERROR: Cannot mount\n");
 			/* just to be sure... */
 			NfsState = STATE_UMOUNT_REQ;
@@ -615,7 +637,10 @@  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
 		break;
 
 	case STATE_UMOUNT_REQ:
-		if (nfs_umountall_reply(pkt, len)) {
+		reply = nfs_umountall_reply(pkt, len);
+		if (reply == -NFS_RPC_DROP)
+			break;
+		else if (reply == -NFS_RPC_ERR) {
 			puts("*** ERROR: Cannot umount\n");
 			net_set_state(NETLOOP_FAIL);
 		} else {
@@ -625,7 +650,10 @@  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
 		break;
 
 	case STATE_LOOKUP_REQ:
-		if (nfs_lookup_reply(pkt, len)) {
+		reply = nfs_lookup_reply(pkt, len);
+		if (reply == -NFS_RPC_DROP)
+			break;
+		else if (reply == -NFS_RPC_ERR) {
 			puts("*** ERROR: File lookup fail\n");
 			NfsState = STATE_UMOUNT_REQ;
 			NfsSend();
@@ -638,7 +666,10 @@  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
 		break;
 
 	case STATE_READLINK_REQ:
-		if (nfs_readlink_reply(pkt, len)) {
+		reply = nfs_readlink_reply(pkt, len);
+		if (reply == -NFS_RPC_DROP)
+			break;
+		else if (reply == -NFS_RPC_ERR) {
 			puts("*** ERROR: Symlink fail\n");
 			NfsState = STATE_UMOUNT_REQ;
 			NfsSend();
@@ -654,7 +685,7 @@  NfsHandler(uchar *pkt, unsigned dest, IPaddr_t sip, unsigned src, unsigned len)
 
 	case STATE_READ_REQ:
 		rlen = nfs_read_reply(pkt, len);
-		NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
+		NetSetTimeout(nfs_timeout, NfsTimeout);
 		if (rlen > 0) {
 			nfs_offset += rlen;
 			NfsSend();
@@ -738,7 +769,7 @@  NfsStart(void)
 	printf("\nLoad address: 0x%lx\n"
 		"Loading: *\b", load_addr);
 
-	NetSetTimeout(NFS_TIMEOUT, NfsTimeout);
+	NetSetTimeout(nfs_timeout, NfsTimeout);
 	net_set_udp_handler(NfsHandler);
 
 	NfsTimeoutCount = 0;