diff mbox series

[net] rxrpc: Fix life check

Message ID 154206200235.19188.1775391172980714293.stgit@warthog.procyon.org.uk
State Accepted, archived
Delegated to: David Miller
Headers show
Series [net] rxrpc: Fix life check | expand

Commit Message

David Howells Nov. 12, 2018, 10:33 p.m. UTC
The life-checking function, which is used by kAFS to make sure that a call
is still live in the event of a pending signal, only samples the received
packet serial number counter; it doesn't actually provoke a change in the
counter, rather relying on the server to happen to give us a packet in the
time window.

Fix this by adding a function to force a ping to be transmitted.

kAFS then keeps track of whether there's been a stall, and if so, uses the
new function to ping the server, resetting the timeout to allow the reply
to come back.

If there's a stall, a ping and the call is *still* stalled in the same
place after another period, then the call will be aborted.

Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
Fixes: f4d15fb6f99a ("rxrpc: Provide functions for allowing cleaner handling of signals")
Signed-off-by: David Howells <dhowells@redhat.com>
---

 Documentation/networking/rxrpc.txt |   17 +++++++++++------
 fs/afs/rxrpc.c                     |   11 ++++++++++-
 include/net/af_rxrpc.h             |    3 ++-
 include/trace/events/rxrpc.h       |    2 ++
 net/rxrpc/af_rxrpc.c               |   27 +++++++++++++++++++++++----
 5 files changed, 48 insertions(+), 12 deletions(-)

Comments

David Miller Nov. 15, 2018, 7:36 p.m. UTC | #1
From: David Howells <dhowells@redhat.com>
Date: Mon, 12 Nov 2018 22:33:22 +0000

> The life-checking function, which is used by kAFS to make sure that a call
> is still live in the event of a pending signal, only samples the received
> packet serial number counter; it doesn't actually provoke a change in the
> counter, rather relying on the server to happen to give us a packet in the
> time window.
> 
> Fix this by adding a function to force a ping to be transmitted.
> 
> kAFS then keeps track of whether there's been a stall, and if so, uses the
> new function to ping the server, resetting the timeout to allow the reply
> to come back.
> 
> If there's a stall, a ping and the call is *still* stalled in the same
> place after another period, then the call will be aborted.
> 
> Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
> Fixes: f4d15fb6f99a ("rxrpc: Provide functions for allowing cleaner handling of signals")
> Signed-off-by: David Howells <dhowells@redhat.com>

Applied, thank you.
diff mbox series

Patch

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 605e00cdd6be..89f1302d593a 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -1056,18 +1056,23 @@  The kernel interface functions are as follows:
 
 	u32 rxrpc_kernel_check_life(struct socket *sock,
 				    struct rxrpc_call *call);
+	void rxrpc_kernel_probe_life(struct socket *sock,
+				     struct rxrpc_call *call);
 
-     This returns a number that is updated when ACKs are received from the peer
-     (notably including PING RESPONSE ACKs which we can elicit by sending PING
-     ACKs to see if the call still exists on the server).  The caller should
-     compare the numbers of two calls to see if the call is still alive after
-     waiting for a suitable interval.
+     The first function returns a number that is updated when ACKs are received
+     from the peer (notably including PING RESPONSE ACKs which we can elicit by
+     sending PING ACKs to see if the call still exists on the server).  The
+     caller should compare the numbers of two calls to see if the call is still
+     alive after waiting for a suitable interval.
 
      This allows the caller to work out if the server is still contactable and
      if the call is still alive on the server whilst waiting for the server to
      process a client operation.
 
-     This function may transmit a PING ACK.
+     The second function causes a ping ACK to be transmitted to try to provoke
+     the peer into responding, which would then cause the value returned by the
+     first function to change.  Note that this must be called in TASK_RUNNING
+     state.
 
  (*) Get reply timestamp.
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 59970886690f..a7b44863d502 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -576,6 +576,7 @@  static long afs_wait_for_call_to_complete(struct afs_call *call,
 {
 	signed long rtt2, timeout;
 	long ret;
+	bool stalled = false;
 	u64 rtt;
 	u32 life, last_life;
 
@@ -609,12 +610,20 @@  static long afs_wait_for_call_to_complete(struct afs_call *call,
 
 		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
 		if (timeout == 0 &&
-		    life == last_life && signal_pending(current))
+		    life == last_life && signal_pending(current)) {
+			if (stalled)
 				break;
+			__set_current_state(TASK_RUNNING);
+			rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
+			timeout = rtt2;
+			stalled = true;
+			continue;
+		}
 
 		if (life != last_life) {
 			timeout = rtt2;
 			last_life = life;
+			stalled = false;
 		}
 
 		timeout = schedule_timeout(timeout);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index de587948042a..1adefe42c0a6 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -77,7 +77,8 @@  int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
 			    struct sockaddr_rxrpc *, struct key *);
 int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
 			    enum rxrpc_call_completion *, u32 *);
-u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
+u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
 u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
 bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
 				 ktime_t *);
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 573d5b901fb1..5b50fe4906d2 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -181,6 +181,7 @@  enum rxrpc_timer_trace {
 enum rxrpc_propose_ack_trace {
 	rxrpc_propose_ack_client_tx_end,
 	rxrpc_propose_ack_input_data,
+	rxrpc_propose_ack_ping_for_check_life,
 	rxrpc_propose_ack_ping_for_keepalive,
 	rxrpc_propose_ack_ping_for_lost_ack,
 	rxrpc_propose_ack_ping_for_lost_reply,
@@ -380,6 +381,7 @@  enum rxrpc_tx_point {
 #define rxrpc_propose_ack_traces \
 	EM(rxrpc_propose_ack_client_tx_end,	"ClTxEnd") \
 	EM(rxrpc_propose_ack_input_data,	"DataIn ") \
+	EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \
 	EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
 	EM(rxrpc_propose_ack_ping_for_lost_ack,	"LostAck") \
 	EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 64362d078da8..a2522f9d71e2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -375,16 +375,35 @@  EXPORT_SYMBOL(rxrpc_kernel_end_call);
  * getting ACKs from the server.  Returns a number representing the life state
  * which can be compared to that returned by a previous call.
  *
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
  */
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_check_life(const struct socket *sock,
+			    const struct rxrpc_call *call)
 {
 	return call->acks_latest;
 }
 EXPORT_SYMBOL(rxrpc_kernel_check_life);
 
+/**
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it.  This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
+ */
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
+{
+	rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+			  rxrpc_propose_ack_ping_for_check_life);
+	rxrpc_send_ack_packet(call, true, NULL);
+}
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
+
 /**
  * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
  * @sock: The socket the call is on