Message ID | 20191029233815.12391-2-matthew.ruffell@canonical.com |
---|---|
State | New |
Headers | show |
Series | NFSv4.1: Interrupted connections cause high bandwidth RPC ping-pong between client and server | expand |
On 30.10.19 00:38, Matthew Ruffell wrote: > From: Trond Myklebust <trond.myklebust@hammerspace.com> > > BugLink: https://bugs.launchpad.net/bugs/1828978 > > A 'false retry' in NFSv4.1 occurs when the client attempts to transmit a > new RPC call using a slot+sequence number combination that references an > already cached one. Currently, the Linux NFS client will do this if a > user process interrupts an RPC call that is in progress. > The problem with doing so is that we defeat the main mechanism used by > the server to differentiate between a new call and a replayed one. Even > if the server is able to perfectly cache the arguments of the old call, > it cannot know if the client intended to replay or send a new call. > > The obvious fix is to bump the sequence number pre-emptively if an > RPC call is interrupted, but in order to deal with the corner cases > where the interrupted call is not actually received and processed by > the server, we need to interpret the error NFS4ERR_SEQ_MISORDERED > as a sign that we need to either wait or locate a correct sequence > number that lies between the value we sent, and the last value that > was acked by a SEQUENCE call on that slot. > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > Tested-by: Jason Tibbitts <tibbs@math.uh.edu> > (cherry picked from commit 3453d5708b33efe76f40eca1c0ed60923094b971) > Signed-off-by: Matthew Ruffell <matthew.ruffell@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> > --- > fs/nfs/nfs4proc.c | 105 ++++++++++++++++++++----------------------- > fs/nfs/nfs4session.c | 5 ++- > fs/nfs/nfs4session.h | 5 ++- > 3 files changed, 55 insertions(+), 60 deletions(-) > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index 0d4270867ddb..cd3bb830d8a1 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -730,13 +730,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) > res->sr_slot = NULL; > } > > +static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, > + u32 seqnr) > +{ > + if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) > + slot->seq_nr_highest_sent = seqnr; > +} > +static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, > + u32 seqnr) > +{ > + slot->seq_nr_highest_sent = seqnr; > + slot->seq_nr_last_acked = seqnr; > +} > + > static int nfs41_sequence_process(struct rpc_task *task, > struct nfs4_sequence_res *res) > { > struct nfs4_session *session; > struct nfs4_slot *slot = res->sr_slot; > struct nfs_client *clp; > - bool interrupted = false; > int ret = 1; > > if (slot == NULL) > @@ -747,16 +759,12 @@ static int nfs41_sequence_process(struct rpc_task *task, > > session = slot->table->session; > > - if (slot->interrupted) { > - if (res->sr_status != -NFS4ERR_DELAY) > - slot->interrupted = 0; > - interrupted = true; > - } > - > trace_nfs4_sequence_done(session, res); > /* Check the SEQUENCE operation status */ > switch (res->sr_status) { > case 0: > + /* Mark this sequence number as having been acked */ > + nfs4_slot_sequence_acked(slot, slot->seq_nr); > /* Update the slot's sequence and clientid lease timer */ > slot->seq_done = 1; > clp = session->clp; > @@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task, > * sr_status remains 1 if an RPC level error occurred. > * The server may or may not have processed the sequence > * operation.. > - * Mark the slot as having hosted an interrupted RPC call. > */ > - slot->interrupted = 1; > + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); > + slot->seq_done = 1; > goto out; > case -NFS4ERR_DELAY: > /* The server detected a resend of the RPC call and > @@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task, > __func__, > slot->slot_nr, > slot->seq_nr); > + nfs4_slot_sequence_acked(slot, slot->seq_nr); > goto out_retry; > case -NFS4ERR_RETRY_UNCACHED_REP: > case -NFS4ERR_SEQ_FALSE_RETRY: > @@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task, > * The server thinks we tried to replay a request. > * Retry the call after bumping the sequence ID. > */ > + nfs4_slot_sequence_acked(slot, slot->seq_nr); > goto retry_new_seq; > case -NFS4ERR_BADSLOT: > /* > @@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task, > goto session_recover; > goto retry_nowait; > case -NFS4ERR_SEQ_MISORDERED: > + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); > /* > - * Was the last operation on this sequence interrupted? > - * If so, retry after bumping the sequence number. > + * Were one or more calls using this slot interrupted? > + * If the server never received the request, then our > + * transmitted slot sequence number may be too high. > */ > - if (interrupted) > - goto retry_new_seq; > - /* > - * Could this slot have been previously retired? > - * If so, then the server may be expecting seq_nr = 1! > - */ > - if (slot->seq_nr != 1) { > - slot->seq_nr = 1; > + if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { > + slot->seq_nr--; > goto retry_nowait; > } > - goto session_recover; > + /* > + * RFC5661: > + * A retry might be sent while the original request is > + * still in progress on the replier. The replier SHOULD > + * deal with the issue by returning NFS4ERR_DELAY as the > + * reply to SEQUENCE or CB_SEQUENCE operation, but > + * implementations MAY return NFS4ERR_SEQ_MISORDERED. > + * > + * Restart the search after a delay. > + */ > + slot->seq_nr = slot->seq_nr_highest_sent; > + goto out_retry; > default: > /* Just update the slot sequence no. */ > slot->seq_done = 1; > @@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { > .rpc_call_done = nfs41_call_sync_done, > }; > > -static void > -nfs4_sequence_process_interrupted(struct nfs_client *client, > - struct nfs4_slot *slot, const struct cred *cred) > -{ > - struct rpc_task *task; > - > - task = _nfs41_proc_sequence(client, cred, slot, true); > - if (!IS_ERR(task)) > - rpc_put_task_async(task); > -} > - > #else /* !CONFIG_NFS_V4_1 */ > > static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) > @@ -937,14 +943,6 @@ int nfs4_sequence_done(struct rpc_task *task, > } > EXPORT_SYMBOL_GPL(nfs4_sequence_done); > > -static void > -nfs4_sequence_process_interrupted(struct nfs_client *client, > - struct nfs4_slot *slot, const struct cred *cred) > -{ > - WARN_ON_ONCE(1); > - slot->interrupted = 0; > -} > - > #endif /* !CONFIG_NFS_V4_1 */ > > static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) > @@ -985,26 +983,19 @@ int nfs4_setup_sequence(struct nfs_client *client, > task->tk_timeout = 0; > } > > - for (;;) { > - spin_lock(&tbl->slot_tbl_lock); > - /* The state manager will wait until the slot table is empty */ > - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) > - goto out_sleep; > - > - slot = nfs4_alloc_slot(tbl); > - if (IS_ERR(slot)) { > - /* Try again in 1/4 second */ > - if (slot == ERR_PTR(-ENOMEM)) > - task->tk_timeout = HZ >> 2; > - goto out_sleep; > - } > - spin_unlock(&tbl->slot_tbl_lock); > + spin_lock(&tbl->slot_tbl_lock); > + /* The state manager will wait until the slot table is empty */ > + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) > + goto out_sleep; > > - if (likely(!slot->interrupted)) > - break; > - nfs4_sequence_process_interrupted(client, > - slot, task->tk_msg.rpc_cred); > + slot = nfs4_alloc_slot(tbl); > + if (IS_ERR(slot)) { > + /* Try again in 1/4 second */ > + if (slot == ERR_PTR(-ENOMEM)) > + task->tk_timeout = HZ >> 2; > + goto out_sleep; > } > + spin_unlock(&tbl->slot_tbl_lock); > > nfs4_sequence_attach_slot(args, res, slot); > > diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c > index a5489d70a724..39962c19744f 100644 > --- a/fs/nfs/nfs4session.c > +++ b/fs/nfs/nfs4session.c > @@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, > slot->table = tbl; > slot->slot_nr = slotid; > slot->seq_nr = seq_init; > + slot->seq_nr_highest_sent = seq_init; > + slot->seq_nr_last_acked = seq_init - 1; > } > return slot; > } > @@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, > p = &tbl->slots; > while (*p) { > (*p)->seq_nr = ivalue; > - (*p)->interrupted = 0; > + (*p)->seq_nr_highest_sent = ivalue; > + (*p)->seq_nr_last_acked = ivalue - 1; > p = &(*p)->next; > } > tbl->highest_used_slotid = NFS4_NO_SLOT; > diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h > index 3c550f297561..230509b77121 100644 > --- a/fs/nfs/nfs4session.h > +++ b/fs/nfs/nfs4session.h > @@ -23,8 +23,9 @@ struct nfs4_slot { > unsigned long generation; > u32 slot_nr; > u32 seq_nr; > - unsigned int interrupted : 1, > - privileged : 1, > + u32 seq_nr_last_acked; > + u32 seq_nr_highest_sent; > + unsigned int privileged : 1, > seq_done : 1; > }; > >
On Wed, Oct 30, 2019 at 12:38:15PM +1300, Matthew Ruffell wrote: > From: Trond Myklebust <trond.myklebust@hammerspace.com> > > BugLink: https://bugs.launchpad.net/bugs/1828978 > > A 'false retry' in NFSv4.1 occurs when the client attempts to transmit a > new RPC call using a slot+sequence number combination that references an > already cached one. Currently, the Linux NFS client will do this if a > user process interrupts an RPC call that is in progress. > The problem with doing so is that we defeat the main mechanism used by > the server to differentiate between a new call and a replayed one. Even > if the server is able to perfectly cache the arguments of the old call, > it cannot know if the client intended to replay or send a new call. > > The obvious fix is to bump the sequence number pre-emptively if an > RPC call is interrupted, but in order to deal with the corner cases > where the interrupted call is not actually received and processed by > the server, we need to interpret the error NFS4ERR_SEQ_MISORDERED > as a sign that we need to either wait or locate a correct sequence > number that lies between the value we sent, and the last value that > was acked by a SEQUENCE call on that slot. > > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> > Tested-by: Jason Tibbitts <tibbs@math.uh.edu> > (cherry picked from commit 3453d5708b33efe76f40eca1c0ed60923094b971) > Signed-off-by: Matthew Ruffell <matthew.ruffell@canonical.com> > --- > fs/nfs/nfs4proc.c | 105 ++++++++++++++++++++----------------------- > fs/nfs/nfs4session.c | 5 ++- > fs/nfs/nfs4session.h | 5 ++- > 3 files changed, 55 insertions(+), 60 deletions(-) > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index 0d4270867ddb..cd3bb830d8a1 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -730,13 +730,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) > res->sr_slot = NULL; > } > > +static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, > + u32 seqnr) > +{ > + if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) > + slot->seq_nr_highest_sent = seqnr; > +} > +static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, > + u32 seqnr) > +{ > + slot->seq_nr_highest_sent = seqnr; > + slot->seq_nr_last_acked = seqnr; > +} > + > static int nfs41_sequence_process(struct rpc_task *task, > struct nfs4_sequence_res *res) > { > struct nfs4_session *session; > struct nfs4_slot *slot = res->sr_slot; > struct nfs_client *clp; > - bool interrupted = false; > int ret = 1; > > if (slot == NULL) > @@ -747,16 +759,12 @@ static int nfs41_sequence_process(struct rpc_task *task, > > session = slot->table->session; > > - if (slot->interrupted) { > - if (res->sr_status != -NFS4ERR_DELAY) > - slot->interrupted = 0; > - interrupted = true; > - } > - > trace_nfs4_sequence_done(session, res); > /* Check the SEQUENCE operation status */ > switch (res->sr_status) { > case 0: > + /* Mark this sequence number as having been acked */ > + nfs4_slot_sequence_acked(slot, slot->seq_nr); > /* Update the slot's sequence and clientid lease timer */ > slot->seq_done = 1; > clp = session->clp; > @@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task, > * sr_status remains 1 if an RPC level error occurred. > * The server may or may not have processed the sequence > * operation.. > - * Mark the slot as having hosted an interrupted RPC call. > */ > - slot->interrupted = 1; > + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); > + slot->seq_done = 1; > goto out; > case -NFS4ERR_DELAY: > /* The server detected a resend of the RPC call and > @@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task, > __func__, > slot->slot_nr, > slot->seq_nr); > + nfs4_slot_sequence_acked(slot, slot->seq_nr); > goto out_retry; > case -NFS4ERR_RETRY_UNCACHED_REP: > case -NFS4ERR_SEQ_FALSE_RETRY: > @@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task, > * The server thinks we tried to replay a request. > * Retry the call after bumping the sequence ID. > */ > + nfs4_slot_sequence_acked(slot, slot->seq_nr); > goto retry_new_seq; > case -NFS4ERR_BADSLOT: > /* > @@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task, > goto session_recover; > goto retry_nowait; > case -NFS4ERR_SEQ_MISORDERED: > + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); > /* > - * Was the last operation on this sequence interrupted? > - * If so, retry after bumping the sequence number. > + * Were one or more calls using this slot interrupted? > + * If the server never received the request, then our > + * transmitted slot sequence number may be too high. > */ > - if (interrupted) > - goto retry_new_seq; > - /* > - * Could this slot have been previously retired? > - * If so, then the server may be expecting seq_nr = 1! > - */ > - if (slot->seq_nr != 1) { > - slot->seq_nr = 1; > + if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { > + slot->seq_nr--; > goto retry_nowait; > } > - goto session_recover; > + /* > + * RFC5661: > + * A retry might be sent while the original request is > + * still in progress on the replier. The replier SHOULD > + * deal with the issue by returning NFS4ERR_DELAY as the > + * reply to SEQUENCE or CB_SEQUENCE operation, but > + * implementations MAY return NFS4ERR_SEQ_MISORDERED. > + * > + * Restart the search after a delay. > + */ > + slot->seq_nr = slot->seq_nr_highest_sent; > + goto out_retry; > default: > /* Just update the slot sequence no. */ > slot->seq_done = 1; > @@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { > .rpc_call_done = nfs41_call_sync_done, > }; > > -static void > -nfs4_sequence_process_interrupted(struct nfs_client *client, > - struct nfs4_slot *slot, const struct cred *cred) > -{ > - struct rpc_task *task; > - > - task = _nfs41_proc_sequence(client, cred, slot, true); > - if (!IS_ERR(task)) > - rpc_put_task_async(task); > -} > - > #else /* !CONFIG_NFS_V4_1 */ > > static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) > @@ -937,14 +943,6 @@ int nfs4_sequence_done(struct rpc_task *task, > } > EXPORT_SYMBOL_GPL(nfs4_sequence_done); > > -static void > -nfs4_sequence_process_interrupted(struct nfs_client *client, > - struct nfs4_slot *slot, const struct cred *cred) > -{ > - WARN_ON_ONCE(1); > - slot->interrupted = 0; > -} > - > #endif /* !CONFIG_NFS_V4_1 */ > > static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) > @@ -985,26 +983,19 @@ int nfs4_setup_sequence(struct nfs_client *client, > task->tk_timeout = 0; > } > > - for (;;) { > - spin_lock(&tbl->slot_tbl_lock); > - /* The state manager will wait until the slot table is empty */ > - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) > - goto out_sleep; > - > - slot = nfs4_alloc_slot(tbl); > - if (IS_ERR(slot)) { > - /* Try again in 1/4 second */ > - if (slot == ERR_PTR(-ENOMEM)) > - task->tk_timeout = HZ >> 2; > - goto out_sleep; > - } > - spin_unlock(&tbl->slot_tbl_lock); > + spin_lock(&tbl->slot_tbl_lock); > + /* The state manager will wait until the slot table is empty */ > + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) > + goto out_sleep; > > - if (likely(!slot->interrupted)) > - break; > - nfs4_sequence_process_interrupted(client, > - slot, task->tk_msg.rpc_cred); > + slot = nfs4_alloc_slot(tbl); > + if (IS_ERR(slot)) { > + /* Try again in 1/4 second */ > + if (slot == ERR_PTR(-ENOMEM)) > + task->tk_timeout = HZ >> 2; > + goto out_sleep; > } > + spin_unlock(&tbl->slot_tbl_lock); > > nfs4_sequence_attach_slot(args, res, slot); > > diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c > index a5489d70a724..39962c19744f 100644 > --- a/fs/nfs/nfs4session.c > +++ b/fs/nfs/nfs4session.c > @@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, > slot->table = tbl; > slot->slot_nr = slotid; > slot->seq_nr = seq_init; > + slot->seq_nr_highest_sent = seq_init; > + slot->seq_nr_last_acked = seq_init - 1; > } > return slot; > } > @@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, > p = &tbl->slots; > while (*p) { > (*p)->seq_nr = ivalue; > - (*p)->interrupted = 0; > + (*p)->seq_nr_highest_sent = ivalue; > + (*p)->seq_nr_last_acked = ivalue - 1; > p = &(*p)->next; > } > tbl->highest_used_slotid = NFS4_NO_SLOT; > diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h > index 3c550f297561..230509b77121 100644 > --- a/fs/nfs/nfs4session.h > +++ b/fs/nfs/nfs4session.h > @@ -23,8 +23,9 @@ struct nfs4_slot { > unsigned long generation; > u32 slot_nr; > u32 seq_nr; > - unsigned int interrupted : 1, > - privileged : 1, > + u32 seq_nr_last_acked; > + u32 seq_nr_highest_sent; > + unsigned int privileged : 1, > seq_done : 1; > }; > > -- > 2.20.1 > > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team Acked-by: Sultan Alsawaf <sultan.alsawaf@canonical.com>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0d4270867ddb..cd3bb830d8a1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -730,13 +730,25 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) res->sr_slot = NULL; } +static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, + u32 seqnr) +{ + if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) + slot->seq_nr_highest_sent = seqnr; +} +static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, + u32 seqnr) +{ + slot->seq_nr_highest_sent = seqnr; + slot->seq_nr_last_acked = seqnr; +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { struct nfs4_session *session; struct nfs4_slot *slot = res->sr_slot; struct nfs_client *clp; - bool interrupted = false; int ret = 1; if (slot == NULL) @@ -747,16 +759,12 @@ static int nfs41_sequence_process(struct rpc_task *task, session = slot->table->session; - if (slot->interrupted) { - if (res->sr_status != -NFS4ERR_DELAY) - slot->interrupted = 0; - interrupted = true; - } - trace_nfs4_sequence_done(session, res); /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: + /* Mark this sequence number as having been acked */ + nfs4_slot_sequence_acked(slot, slot->seq_nr); /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; @@ -771,9 +779,9 @@ static int nfs41_sequence_process(struct rpc_task *task, * sr_status remains 1 if an RPC level error occurred. * The server may or may not have processed the sequence * operation.. - * Mark the slot as having hosted an interrupted RPC call. */ - slot->interrupted = 1; + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); + slot->seq_done = 1; goto out; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and @@ -784,6 +792,7 @@ static int nfs41_sequence_process(struct rpc_task *task, __func__, slot->slot_nr, slot->seq_nr); + nfs4_slot_sequence_acked(slot, slot->seq_nr); goto out_retry; case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_SEQ_FALSE_RETRY: @@ -791,6 +800,7 @@ static int nfs41_sequence_process(struct rpc_task *task, * The server thinks we tried to replay a request. * Retry the call after bumping the sequence ID. */ + nfs4_slot_sequence_acked(slot, slot->seq_nr); goto retry_new_seq; case -NFS4ERR_BADSLOT: /* @@ -801,21 +811,28 @@ static int nfs41_sequence_process(struct rpc_task *task, goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); /* - * Was the last operation on this sequence interrupted? - * If so, retry after bumping the sequence number. + * Were one or more calls using this slot interrupted? + * If the server never received the request, then our + * transmitted slot sequence number may be too high. */ - if (interrupted) - goto retry_new_seq; - /* - * Could this slot have been previously retired? - * If so, then the server may be expecting seq_nr = 1! - */ - if (slot->seq_nr != 1) { - slot->seq_nr = 1; + if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { + slot->seq_nr--; goto retry_nowait; } - goto session_recover; + /* + * RFC5661: + * A retry might be sent while the original request is + * still in progress on the replier. The replier SHOULD + * deal with the issue by returning NFS4ERR_DELAY as the + * reply to SEQUENCE or CB_SEQUENCE operation, but + * implementations MAY return NFS4ERR_SEQ_MISORDERED. + * + * Restart the search after a delay. + */ + slot->seq_nr = slot->seq_nr_highest_sent; + goto out_retry; default: /* Just update the slot sequence no. */ slot->seq_done = 1; @@ -906,17 +923,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; -static void -nfs4_sequence_process_interrupted(struct nfs_client *client, - struct nfs4_slot *slot, const struct cred *cred) -{ - struct rpc_task *task; - - task = _nfs41_proc_sequence(client, cred, slot, true); - if (!IS_ERR(task)) - rpc_put_task_async(task); -} - #else /* !CONFIG_NFS_V4_1 */ static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) @@ -937,14 +943,6 @@ int nfs4_sequence_done(struct rpc_task *task, } EXPORT_SYMBOL_GPL(nfs4_sequence_done); -static void -nfs4_sequence_process_interrupted(struct nfs_client *client, - struct nfs4_slot *slot, const struct cred *cred) -{ - WARN_ON_ONCE(1); - slot->interrupted = 0; -} - #endif /* !CONFIG_NFS_V4_1 */ static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) @@ -985,26 +983,19 @@ int nfs4_setup_sequence(struct nfs_client *client, task->tk_timeout = 0; } - for (;;) { - spin_lock(&tbl->slot_tbl_lock); - /* The state manager will wait until the slot table is empty */ - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) - goto out_sleep; - - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - /* Try again in 1/4 second */ - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - goto out_sleep; - } - spin_unlock(&tbl->slot_tbl_lock); + spin_lock(&tbl->slot_tbl_lock); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; - if (likely(!slot->interrupted)) - break; - nfs4_sequence_process_interrupted(client, - slot, task->tk_msg.rpc_cred); + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* Try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; } + spin_unlock(&tbl->slot_tbl_lock); nfs4_sequence_attach_slot(args, res, slot); diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index a5489d70a724..39962c19744f 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -110,6 +110,8 @@ static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table *tbl, slot->table = tbl; slot->slot_nr = slotid; slot->seq_nr = seq_init; + slot->seq_nr_highest_sent = seq_init; + slot->seq_nr_last_acked = seq_init - 1; } return slot; } @@ -276,7 +278,8 @@ static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl, p = &tbl->slots; while (*p) { (*p)->seq_nr = ivalue; - (*p)->interrupted = 0; + (*p)->seq_nr_highest_sent = ivalue; + (*p)->seq_nr_last_acked = ivalue - 1; p = &(*p)->next; } tbl->highest_used_slotid = NFS4_NO_SLOT; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 3c550f297561..230509b77121 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -23,8 +23,9 @@ struct nfs4_slot { unsigned long generation; u32 slot_nr; u32 seq_nr; - unsigned int interrupted : 1, - privileged : 1, + u32 seq_nr_last_acked; + u32 seq_nr_highest_sent; + unsigned int privileged : 1, seq_done : 1; };