[04/11] libflash/mbox-flash: Allow mbox-flash to tell the driver msg timeouts

Message ID 20170629123925.28243-5-cyril.bur@au1.ibm.com
State Superseded
Headers show

Commit Message

Cyril Bur June 29, 2017, 12:39 p.m.
Currently when mbox-flash decides that a message times out the driver
has no way of knowing to drop the message and will continue waiting for
a response indefinitely preventing more messages from ever being sent.

This is a problem if the BMC crashes or has some other issue where it
won't ever respond to our outstanding message.

This patch provides a method for mbox-flash to tell the driver how long
it should wait before it no longer needs to care about the response.

Signed-off-by: Cyril Bur <cyril.bur@au1.ibm.com>
---
 hw/lpc-mbox.c         | 12 +++++++++---
 include/lpc-mbox.h    |  2 +-
 libflash/mbox-flash.c | 17 +++++++++--------
 3 files changed, 19 insertions(+), 12 deletions(-)

Comments

Samuel Mendoza-Jonas July 14, 2017, 4:03 a.m. | #1
On Thu, 2017-06-29 at 22:39 +1000, Cyril Bur wrote:
> Currently when mbox-flash decides that a message times out the driver
> has no way of knowing to drop the message and will continue waiting for
> a response indefinitely preventing more messages from ever being sent.
> 
> This is a problem if the BMC crashes or has some other issue where it
> won't ever respond to our outstanding message.
> 
> This patch provides a method for mbox-flash to tell the driver how long
> it should wait before it no longer needs to care about the response.
> 
> Signed-off-by: Cyril Bur <cyril.bur@au1.ibm.com>
> ---
>  hw/lpc-mbox.c         | 12 +++++++++---
>  include/lpc-mbox.h    |  2 +-
>  libflash/mbox-flash.c | 17 +++++++++--------
>  3 files changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/lpc-mbox.c b/hw/lpc-mbox.c
> index 74214938..f303f4d5 100644
> --- a/hw/lpc-mbox.c
> +++ b/hw/lpc-mbox.c
> @@ -64,6 +64,7 @@ struct mbox {
>  	struct lock lock; /* Protect in_flight */
>  	struct bmc_mbox_msg *in_flight;
>  	uint8_t sequence;
> +	unsigned long timeout;
>  };
>  
>  static struct mbox mbox;
> @@ -115,7 +116,7 @@ static void bmc_mbox_send_message(struct bmc_mbox_msg *msg)
>  	bmc_mbox_outb(MBOX_CTRL_INT_SEND, MBOX_HOST_CTRL);
>  }
>  
> -int bmc_mbox_enqueue(struct bmc_mbox_msg *msg)
> +int bmc_mbox_enqueue(struct bmc_mbox_msg *msg, unsigned int timeout_sec)

Is there a benefit to specifying timeout_sec on every call to
bmc_mbox_enqueue / msg_send, vs setting it in eg. the mbox struct?

>  {
>  	if (!mbox.base) {
>  		prlog(PR_CRIT, "Using MBOX without init!\n");
> @@ -125,10 +126,15 @@ int bmc_mbox_enqueue(struct bmc_mbox_msg *msg)
>  	lock(&mbox.lock);
>  	if (mbox.in_flight) {
>  		prlog(PR_DEBUG, "MBOX message already in flight\n");
> -		unlock(&mbox.lock);
> -		return OPAL_BUSY;
> +		if (mftb() > mbox.timeout) {
> +			prlog(PR_ERR, "In flight message dropped on the floor\n");
> +		} else {
> +			unlock(&mbox.lock);
> +			return OPAL_BUSY;
> +		}
>  	}
>  
> +	mbox.timeout = mftb() + secs_to_tb(timeout_sec);
>  	mbox.in_flight = msg;
>  	unlock(&mbox.lock);
>  	msg->seq = ++mbox.sequence;
> diff --git a/include/lpc-mbox.h b/include/lpc-mbox.h
> index c4b1015b..569f1f72 100644
> --- a/include/lpc-mbox.h
> +++ b/include/lpc-mbox.h
> @@ -63,7 +63,7 @@ struct bmc_mbox_msg {
>  	uint8_t bmc;
>  };
>  
> -int bmc_mbox_enqueue(struct bmc_mbox_msg *msg);
> +int bmc_mbox_enqueue(struct bmc_mbox_msg *msg, unsigned int timeout_sec);
>  int bmc_mbox_register_callback(void (*callback)(struct bmc_mbox_msg *msg, void *priv),
>  		void *drv_data);
>  int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void *priv),
> diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
> index 90e6e860..2914901e 100644
> --- a/libflash/mbox-flash.c
> +++ b/libflash/mbox-flash.c
> @@ -252,13 +252,14 @@ static bool is_reboot(struct mbox_flash_data *mbox_flash)
>  	return mbox_flash->reboot;
>  }
>  
> -static int msg_send(struct mbox_flash_data *mbox_flash, struct bmc_mbox_msg *msg)
> +static int msg_send(struct mbox_flash_data *mbox_flash, struct bmc_mbox_msg *msg,
> +		unsigned int timeout_sec)
>  {
>  	if (is_reboot(mbox_flash))
>  		return FLASH_ERR_AGAIN;
>  	mbox_flash->busy = true;
>  	mbox_flash->rc = 0;
> -	return bmc_mbox_enqueue(msg);
> +	return bmc_mbox_enqueue(msg, timeout_sec);
>  }
>  
>  static int wait_for_bmc(struct mbox_flash_data *mbox_flash, unsigned int timeout_sec)
> @@ -307,7 +308,7 @@ static int mbox_flash_ack(struct mbox_flash_data *mbox_flash, uint8_t reg)
>  	/* Clear this first so msg_send() doesn't freak out */
>  	mbox_flash->reboot = false;
>  
> -	rc = msg_send(mbox_flash, msg);
> +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
>  
>  	/* Still need to deal with it, we've only acked it now. */
>  	mbox_flash->reboot = true;
> @@ -499,7 +500,7 @@ static int mbox_flash_mark_write(struct mbox_flash_data *mbox_flash,
>  		msg_put_u16(msg, 2, end - start); /* Total Length */
>  	}
>  
> -	rc = msg_send(mbox_flash, msg);
> +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
>  		goto out;
> @@ -554,7 +555,7 @@ static int mbox_flash_flush(struct mbox_flash_data *mbox_flash)
>  	if (!msg)
>  		return FLASH_ERR_MALLOC_FAILED;
>  
> -	rc = msg_send(mbox_flash, msg);
> +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
>  		goto out;
> @@ -609,7 +610,7 @@ static int mbox_window_move(struct mbox_flash_data *mbox_flash,
>  		return FLASH_ERR_MALLOC_FAILED;
>  
>  	msg_put_u16(msg, 0, bytes_to_blocks(mbox_flash, pos));
> -	rc = msg_send(mbox_flash, msg);
> +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
>  		goto out;
> @@ -772,7 +773,7 @@ static int mbox_flash_get_info(struct blocklevel_device *bl, const char **name,
>  		*name = NULL;
>  
>  	mbox_flash->busy = true;
> -	rc = msg_send(mbox_flash, msg);
> +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
>  		goto out;
> @@ -965,7 +966,7 @@ static int protocol_init(struct mbox_flash_data *mbox_flash)
>  		return FLASH_ERR_MALLOC_FAILED;
>  
>  	msg_put_u8(msg, 0, mbox_flash->version);
> -	rc = msg_send(mbox_flash, msg);
> +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
>  		goto out;
Cyril Bur July 18, 2017, 1:21 a.m. | #2
On Fri, 2017-07-14 at 14:03 +1000, Samuel Mendoza-Jonas wrote:
> On Thu, 2017-06-29 at 22:39 +1000, Cyril Bur wrote:
> > Currently when mbox-flash decides that a message times out the driver
> > has no way of knowing to drop the message and will continue waiting for
> > a response indefinitely preventing more messages from ever being sent.
> > 
> > This is a problem if the BMC crashes or has some other issue where it
> > won't ever respond to our outstanding message.
> > 
> > This patch provides a method for mbox-flash to tell the driver how long
> > it should wait before it no longer needs to care about the response.
> > 
> > Signed-off-by: Cyril Bur <cyril.bur@au1.ibm.com>
> > ---
> >  hw/lpc-mbox.c         | 12 +++++++++---
> >  include/lpc-mbox.h    |  2 +-
> >  libflash/mbox-flash.c | 17 +++++++++--------
> >  3 files changed, 19 insertions(+), 12 deletions(-)
> > 
> > diff --git a/hw/lpc-mbox.c b/hw/lpc-mbox.c
> > index 74214938..f303f4d5 100644
> > --- a/hw/lpc-mbox.c
> > +++ b/hw/lpc-mbox.c
> > @@ -64,6 +64,7 @@ struct mbox {
> >  	struct lock lock; /* Protect in_flight */
> >  	struct bmc_mbox_msg *in_flight;
> >  	uint8_t sequence;
> > +	unsigned long timeout;
> >  };
> >  
> >  static struct mbox mbox;
> > @@ -115,7 +116,7 @@ static void bmc_mbox_send_message(struct bmc_mbox_msg *msg)
> >  	bmc_mbox_outb(MBOX_CTRL_INT_SEND, MBOX_HOST_CTRL);
> >  }
> >  
> > -int bmc_mbox_enqueue(struct bmc_mbox_msg *msg)
> > +int bmc_mbox_enqueue(struct bmc_mbox_msg *msg, unsigned int timeout_sec)
> 
> Is there a benefit to specifying timeout_sec on every call to
> bmc_mbox_enqueue / msg_send, vs setting it in eg. the mbox struct?
> 

As you notice in a later patch there is. However, when I did this in
this patch I thought it would be awesome and we could fine tune the
timeouts per different types of calls, its going to be amazing and so
useful.

After so many times of typing the same value of that parameter I'm
totally open to this being superfluous.

> >  {
> >  	if (!mbox.base) {
> >  		prlog(PR_CRIT, "Using MBOX without init!\n");
> > @@ -125,10 +126,15 @@ int bmc_mbox_enqueue(struct bmc_mbox_msg *msg)
> >  	lock(&mbox.lock);
> >  	if (mbox.in_flight) {
> >  		prlog(PR_DEBUG, "MBOX message already in flight\n");
> > -		unlock(&mbox.lock);
> > -		return OPAL_BUSY;
> > +		if (mftb() > mbox.timeout) {
> > +			prlog(PR_ERR, "In flight message dropped on the floor\n");
> > +		} else {
> > +			unlock(&mbox.lock);
> > +			return OPAL_BUSY;
> > +		}
> >  	}
> >  
> > +	mbox.timeout = mftb() + secs_to_tb(timeout_sec);
> >  	mbox.in_flight = msg;
> >  	unlock(&mbox.lock);
> >  	msg->seq = ++mbox.sequence;
> > diff --git a/include/lpc-mbox.h b/include/lpc-mbox.h
> > index c4b1015b..569f1f72 100644
> > --- a/include/lpc-mbox.h
> > +++ b/include/lpc-mbox.h
> > @@ -63,7 +63,7 @@ struct bmc_mbox_msg {
> >  	uint8_t bmc;
> >  };
> >  
> > -int bmc_mbox_enqueue(struct bmc_mbox_msg *msg);
> > +int bmc_mbox_enqueue(struct bmc_mbox_msg *msg, unsigned int timeout_sec);
> >  int bmc_mbox_register_callback(void (*callback)(struct bmc_mbox_msg *msg, void *priv),
> >  		void *drv_data);
> >  int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void *priv),
> > diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
> > index 90e6e860..2914901e 100644
> > --- a/libflash/mbox-flash.c
> > +++ b/libflash/mbox-flash.c
> > @@ -252,13 +252,14 @@ static bool is_reboot(struct mbox_flash_data *mbox_flash)
> >  	return mbox_flash->reboot;
> >  }
> >  
> > -static int msg_send(struct mbox_flash_data *mbox_flash, struct bmc_mbox_msg *msg)
> > +static int msg_send(struct mbox_flash_data *mbox_flash, struct bmc_mbox_msg *msg,
> > +		unsigned int timeout_sec)
> >  {
> >  	if (is_reboot(mbox_flash))
> >  		return FLASH_ERR_AGAIN;
> >  	mbox_flash->busy = true;
> >  	mbox_flash->rc = 0;
> > -	return bmc_mbox_enqueue(msg);
> > +	return bmc_mbox_enqueue(msg, timeout_sec);
> >  }
> >  
> >  static int wait_for_bmc(struct mbox_flash_data *mbox_flash, unsigned int timeout_sec)
> > @@ -307,7 +308,7 @@ static int mbox_flash_ack(struct mbox_flash_data *mbox_flash, uint8_t reg)
> >  	/* Clear this first so msg_send() doesn't freak out */
> >  	mbox_flash->reboot = false;
> >  
> > -	rc = msg_send(mbox_flash, msg);
> > +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
> >  
> >  	/* Still need to deal with it, we've only acked it now. */
> >  	mbox_flash->reboot = true;
> > @@ -499,7 +500,7 @@ static int mbox_flash_mark_write(struct mbox_flash_data *mbox_flash,
> >  		msg_put_u16(msg, 2, end - start); /* Total Length */
> >  	}
> >  
> > -	rc = msg_send(mbox_flash, msg);
> > +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
> >  	if (rc) {
> >  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
> >  		goto out;
> > @@ -554,7 +555,7 @@ static int mbox_flash_flush(struct mbox_flash_data *mbox_flash)
> >  	if (!msg)
> >  		return FLASH_ERR_MALLOC_FAILED;
> >  
> > -	rc = msg_send(mbox_flash, msg);
> > +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
> >  	if (rc) {
> >  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
> >  		goto out;
> > @@ -609,7 +610,7 @@ static int mbox_window_move(struct mbox_flash_data *mbox_flash,
> >  		return FLASH_ERR_MALLOC_FAILED;
> >  
> >  	msg_put_u16(msg, 0, bytes_to_blocks(mbox_flash, pos));
> > -	rc = msg_send(mbox_flash, msg);
> > +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
> >  	if (rc) {
> >  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
> >  		goto out;
> > @@ -772,7 +773,7 @@ static int mbox_flash_get_info(struct blocklevel_device *bl, const char **name,
> >  		*name = NULL;
> >  
> >  	mbox_flash->busy = true;
> > -	rc = msg_send(mbox_flash, msg);
> > +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
> >  	if (rc) {
> >  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
> >  		goto out;
> > @@ -965,7 +966,7 @@ static int protocol_init(struct mbox_flash_data *mbox_flash)
> >  		return FLASH_ERR_MALLOC_FAILED;
> >  
> >  	msg_put_u8(msg, 0, mbox_flash->version);
> > -	rc = msg_send(mbox_flash, msg);
> > +	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
> >  	if (rc) {
> >  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
> >  		goto out;
> 
>

Patch

diff --git a/hw/lpc-mbox.c b/hw/lpc-mbox.c
index 74214938..f303f4d5 100644
--- a/hw/lpc-mbox.c
+++ b/hw/lpc-mbox.c
@@ -64,6 +64,7 @@  struct mbox {
 	struct lock lock; /* Protect in_flight */
 	struct bmc_mbox_msg *in_flight;
 	uint8_t sequence;
+	unsigned long timeout;
 };
 
 static struct mbox mbox;
@@ -115,7 +116,7 @@  static void bmc_mbox_send_message(struct bmc_mbox_msg *msg)
 	bmc_mbox_outb(MBOX_CTRL_INT_SEND, MBOX_HOST_CTRL);
 }
 
-int bmc_mbox_enqueue(struct bmc_mbox_msg *msg)
+int bmc_mbox_enqueue(struct bmc_mbox_msg *msg, unsigned int timeout_sec)
 {
 	if (!mbox.base) {
 		prlog(PR_CRIT, "Using MBOX without init!\n");
@@ -125,10 +126,15 @@  int bmc_mbox_enqueue(struct bmc_mbox_msg *msg)
 	lock(&mbox.lock);
 	if (mbox.in_flight) {
 		prlog(PR_DEBUG, "MBOX message already in flight\n");
-		unlock(&mbox.lock);
-		return OPAL_BUSY;
+		if (mftb() > mbox.timeout) {
+			prlog(PR_ERR, "In flight message dropped on the floor\n");
+		} else {
+			unlock(&mbox.lock);
+			return OPAL_BUSY;
+		}
 	}
 
+	mbox.timeout = mftb() + secs_to_tb(timeout_sec);
 	mbox.in_flight = msg;
 	unlock(&mbox.lock);
 	msg->seq = ++mbox.sequence;
diff --git a/include/lpc-mbox.h b/include/lpc-mbox.h
index c4b1015b..569f1f72 100644
--- a/include/lpc-mbox.h
+++ b/include/lpc-mbox.h
@@ -63,7 +63,7 @@  struct bmc_mbox_msg {
 	uint8_t bmc;
 };
 
-int bmc_mbox_enqueue(struct bmc_mbox_msg *msg);
+int bmc_mbox_enqueue(struct bmc_mbox_msg *msg, unsigned int timeout_sec);
 int bmc_mbox_register_callback(void (*callback)(struct bmc_mbox_msg *msg, void *priv),
 		void *drv_data);
 int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void *priv),
diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
index 90e6e860..2914901e 100644
--- a/libflash/mbox-flash.c
+++ b/libflash/mbox-flash.c
@@ -252,13 +252,14 @@  static bool is_reboot(struct mbox_flash_data *mbox_flash)
 	return mbox_flash->reboot;
 }
 
-static int msg_send(struct mbox_flash_data *mbox_flash, struct bmc_mbox_msg *msg)
+static int msg_send(struct mbox_flash_data *mbox_flash, struct bmc_mbox_msg *msg,
+		unsigned int timeout_sec)
 {
 	if (is_reboot(mbox_flash))
 		return FLASH_ERR_AGAIN;
 	mbox_flash->busy = true;
 	mbox_flash->rc = 0;
-	return bmc_mbox_enqueue(msg);
+	return bmc_mbox_enqueue(msg, timeout_sec);
 }
 
 static int wait_for_bmc(struct mbox_flash_data *mbox_flash, unsigned int timeout_sec)
@@ -307,7 +308,7 @@  static int mbox_flash_ack(struct mbox_flash_data *mbox_flash, uint8_t reg)
 	/* Clear this first so msg_send() doesn't freak out */
 	mbox_flash->reboot = false;
 
-	rc = msg_send(mbox_flash, msg);
+	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
 
 	/* Still need to deal with it, we've only acked it now. */
 	mbox_flash->reboot = true;
@@ -499,7 +500,7 @@  static int mbox_flash_mark_write(struct mbox_flash_data *mbox_flash,
 		msg_put_u16(msg, 2, end - start); /* Total Length */
 	}
 
-	rc = msg_send(mbox_flash, msg);
+	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
 	if (rc) {
 		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
 		goto out;
@@ -554,7 +555,7 @@  static int mbox_flash_flush(struct mbox_flash_data *mbox_flash)
 	if (!msg)
 		return FLASH_ERR_MALLOC_FAILED;
 
-	rc = msg_send(mbox_flash, msg);
+	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
 	if (rc) {
 		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
 		goto out;
@@ -609,7 +610,7 @@  static int mbox_window_move(struct mbox_flash_data *mbox_flash,
 		return FLASH_ERR_MALLOC_FAILED;
 
 	msg_put_u16(msg, 0, bytes_to_blocks(mbox_flash, pos));
-	rc = msg_send(mbox_flash, msg);
+	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
 	if (rc) {
 		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
 		goto out;
@@ -772,7 +773,7 @@  static int mbox_flash_get_info(struct blocklevel_device *bl, const char **name,
 		*name = NULL;
 
 	mbox_flash->busy = true;
-	rc = msg_send(mbox_flash, msg);
+	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
 	if (rc) {
 		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
 		goto out;
@@ -965,7 +966,7 @@  static int protocol_init(struct mbox_flash_data *mbox_flash)
 		return FLASH_ERR_MALLOC_FAILED;
 
 	msg_put_u8(msg, 0, mbox_flash->version);
-	rc = msg_send(mbox_flash, msg);
+	rc = msg_send(mbox_flash, msg, MBOX_DEFAULT_TIMEOUT);
 	if (rc) {
 		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX message\n");
 		goto out;