diff mbox

[09/16] tty: serial: 8250_dma: Add a TX trigger workaround for AM33xx

Message ID 1410377411-26656-10-git-send-email-bigeasy@linutronix.de
State New
Headers show

Commit Message

Sebastian Andrzej Siewior Sept. 10, 2014, 7:30 p.m. UTC
At least on AM335x the following problem exists: Even if the TX FIFO is
empty and a TX transfer is programmed (and started) the UART does not
trigger the DMA transfer.
After $TRESHOLD number of bytes have been written to the FIFO manually the
UART reevaluates the whole situation and decides that now there is enough
room in the FIFO and so the transfer begins.
This problem has not been seen on DRA7 or beagle board xm (OMAP3). I am not
sure if this is UART-IP core specific or DMA engine.

The workaround is to use a threshold of one byte, program the DMA
transfer minus one byte and then to put the first byte into the FIFO to
kick start the transfer.

v7…v8:
	- fix the problem when get invoked and the FIFO is full.

Reviewed-by: Tony Lindgren <tony@atomide.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 drivers/tty/serial/8250/8250.h     |  3 +++
 drivers/tty/serial/8250/8250_dma.c | 39 +++++++++++++++++++++++++++++++++++---
 include/uapi/linux/serial_reg.h    |  1 +
 3 files changed, 40 insertions(+), 3 deletions(-)

Comments

Heikki Krogerus Sept. 11, 2014, 11:17 a.m. UTC | #1
On Wed, Sep 10, 2014 at 09:30:04PM +0200, Sebastian Andrzej Siewior wrote:
> At least on AM335x the following problem exists: Even if the TX FIFO is
> empty and a TX transfer is programmed (and started) the UART does not
> trigger the DMA transfer.
> After $TRESHOLD number of bytes have been written to the FIFO manually the
> UART reevaluates the whole situation and decides that now there is enough
> room in the FIFO and so the transfer begins.
> This problem has not been seen on DRA7 or beagle board xm (OMAP3). I am not
> sure if this is UART-IP core specific or DMA engine.
> 
> The workaround is to use a threshold of one byte, program the DMA
> transfer minus one byte and then to put the first byte into the FIFO to
> kick start the transfer.
> 
> v7…v8:
> 	- fix the problem when get invoked and the FIFO is full.
> 
> Reviewed-by: Tony Lindgren <tony@atomide.com>
> Tested-by: Tony Lindgren <tony@atomide.com>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  drivers/tty/serial/8250/8250.h     |  3 +++
>  drivers/tty/serial/8250/8250_dma.c | 39 +++++++++++++++++++++++++++++++++++---
>  include/uapi/linux/serial_reg.h    |  1 +
>  3 files changed, 40 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
> index fbed1636e9c4..09489b391568 100644
> --- a/drivers/tty/serial/8250/8250.h
> +++ b/drivers/tty/serial/8250/8250.h
> @@ -82,6 +82,9 @@ struct serial8250_config {
>  #define UART_BUG_PARITY	(1 << 4)	/* UART mishandles parity if FIFO enabled */
>  #define UART_BUG_DMA_RX	(1 << 5)	/* UART needs DMA RX req before there is
>  					   data in FIFO */
> +#define UART_BUG_DMA_TX	(1 << 6)	/* UART needs one byte in FIFO for
> +					   kickstart */

I don't think we should go ahead with this patch. I'm pretty sure
this is AM335 specific problem, or at least limited to only few
platforms. And I don't think we should take any more "BUG" flags.

We should add hooks like tx_dma and rx_dma to struct uart_8250_dma so
that the probe drivers can replace serial8250_tx_dma and
seria8250_rx_dma, like I think Alan already suggested.

Let's keep serial8250_tx_dma/rx_dma as the default, and not add any
quirks to them. Only if there is a very common case should it be
handled in those. The case of RX req needing to be sent before data in
FIFO maybe one of those, but I'm no sure.


>  #define PROBE_RSA	(1 << 0)
>  #define PROBE_ANY	(~0)
>  
> diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
> index 3674900a1f14..48dc57aad0dd 100644
> --- a/drivers/tty/serial/8250/8250_dma.c
> +++ b/drivers/tty/serial/8250/8250_dma.c
> @@ -83,6 +83,7 @@ int serial8250_tx_dma(struct uart_8250_port *p)
>  	struct uart_8250_dma		*dma = p->dma;
>  	struct circ_buf			*xmit = &p->port.state->xmit;
>  	struct dma_async_tx_descriptor	*desc;
> +	unsigned int			skip_byte = 0;
>  	int ret;
>  
>  	if (uart_tx_stopped(&p->port) || dma->tx_running ||
> @@ -91,10 +92,40 @@ int serial8250_tx_dma(struct uart_8250_port *p)
>  
>  	dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
>  
> +	if (p->bugs & UART_BUG_DMA_TX) {
> +		u8 tx_lvl;
> +
> +		/*
> +		 * We need to put the first byte into the FIFO in order to start
> +		 * the DMA transfer. For transfers smaller than four bytes we
> +		 * don't bother doing DMA at all. It seem not matter if there
> +		 * are still bytes in the FIFO from the last transfer (in case
> +		 * we got here directly from __dma_tx_complete()). Bytes leaving
> +		 * the FIFO seem not to trigger the DMA transfer. It is really
> +		 * the byte that we put into the FIFO.
> +		 * If the FIFO is already full then we most likely got here from
> +		 * __dma_tx_complete(). And this means the DMA engine just
> +		 * completed its work. We don't have to wait the complete 86us
> +		 * at 115200,8n1 but around 60us (not to mention lower
> +		 * baudrates). So in that case we take the interrupt and try
> +		 * again with an empty FIFO.
> +		 */
> +		tx_lvl = serial_in(p, UART_OMAP_TX_LVL);
> +		if (tx_lvl == p->tx_loadsz) {
> +			ret = -EBUSY;
> +			goto err;
> +		}
> +		if (dma->tx_size < 4) {
> +			ret = -EINVAL;
> +			goto err;
> +		}
> +		skip_byte = 1;
> +	}
> +
>  	desc = dmaengine_prep_slave_single(dma->txchan,
> -					   dma->tx_addr + xmit->tail,
> -					   dma->tx_size, DMA_MEM_TO_DEV,
> -					   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +			dma->tx_addr + xmit->tail + skip_byte,
> +			dma->tx_size - skip_byte, DMA_MEM_TO_DEV,
> +			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
>  	if (!desc) {
>  		ret = -EBUSY;
>  		goto err;
> @@ -118,6 +149,8 @@ int serial8250_tx_dma(struct uart_8250_port *p)
>  			serial_out(p, UART_IER, p->ier);
>  		}
>  	}
> +	if (skip_byte)
> +		serial_out(p, UART_TX, xmit->buf[xmit->tail]);
>  	return 0;
>  err:
>  	dma->tx_err = 1;
> diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
> index df6c9ab6b0cd..53af3b790129 100644
> --- a/include/uapi/linux/serial_reg.h
> +++ b/include/uapi/linux/serial_reg.h
> @@ -359,6 +359,7 @@
>  #define UART_OMAP_SYSC		0x15	/* System configuration register */
>  #define UART_OMAP_SYSS		0x16	/* System status register */
>  #define UART_OMAP_WER		0x17	/* Wake-up enable register */
> +#define UART_OMAP_TX_LVL	0x1a	/* TX FIFO level register */
>  
>  /*
>   * These are the definitions for the MDR1 register
> -- 
> 2.1.0

Cheers,
Sebastian Andrzej Siewior Sept. 11, 2014, 11:42 a.m. UTC | #2
On 09/11/2014 01:17 PM, Heikki Krogerus wrote:
>> diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
>> index fbed1636e9c4..09489b391568 100644
>> --- a/drivers/tty/serial/8250/8250.h
>> +++ b/drivers/tty/serial/8250/8250.h
>> @@ -82,6 +82,9 @@ struct serial8250_config {
>>  #define UART_BUG_PARITY	(1 << 4)	/* UART mishandles parity if FIFO enabled */
>>  #define UART_BUG_DMA_RX	(1 << 5)	/* UART needs DMA RX req before there is
>>  					   data in FIFO */
>> +#define UART_BUG_DMA_TX	(1 << 6)	/* UART needs one byte in FIFO for
>> +					   kickstart */
> 
> I don't think we should go ahead with this patch. I'm pretty sure
> this is AM335 specific problem, or at least limited to only few
> platforms. And I don't think we should take any more "BUG" flags.
> 
> We should add hooks like tx_dma and rx_dma to struct uart_8250_dma so
> that the probe drivers can replace serial8250_tx_dma and
> seria8250_rx_dma, like I think Alan already suggested.

Okay. Wasn't aware that Alan already suggested that.
I also need a watchdog timer for TX since it seems that on omap3 the
DMA engine suddenly forgets to continue with DMA…

If this is really what we want, I would need to refactor a few things…

> Let's keep serial8250_tx_dma/rx_dma as the default, and not add any
> quirks to them. Only if there is a very common case should it be
> handled in those. The case of RX req needing to be sent before data in
> FIFO maybe one of those, but I'm no sure.

keep in mind that both (RX & TX bugs/hacks) need also a bit of handling
in the 8250-core so it works together (like the tx_err member so we
fall back to manual xmit)

Sebastian
Peter Hurley Sept. 11, 2014, 12:32 p.m. UTC | #3
On 09/11/2014 07:42 AM, Sebastian Andrzej Siewior wrote:
> I also need a watchdog timer for TX since it seems that on omap3 the
> DMA engine suddenly forgets to continue with DMA…

One difference I noticed between the omap driver and the 8250 driver is
the way modem status interrupts are handled.

The omap driver only checks modem status for the UART_IIR_MSI interrupt type.
The 8250 driver checks modem status at every interrupt (other than NO_INT).

I think the UART_MSR_DCTS bit always reflects that the CTS input has changed
between reads of the MSR _even if auto CTS is on_. So perhaps the hardware
is being stopped by uart_handle_cts_change() when auto CTS is on?

Regards,
Peter Hurley

[The UPF_HARD_FLOW thing was pretty much just done for omap even though
8250 already had auto CTS/auto RTS. Serial core hardware flow control support
needs a redo as drivers have pretty much tacked stuff on randomly.]
Sebastian Andrzej Siewior Sept. 11, 2014, 12:50 p.m. UTC | #4
On 09/11/2014 02:32 PM, Peter Hurley wrote:
> On 09/11/2014 07:42 AM, Sebastian Andrzej Siewior wrote:
>> I also need a watchdog timer for TX since it seems that on omap3 the
>> DMA engine suddenly forgets to continue with DMA…
> 
> One difference I noticed between the omap driver and the 8250 driver is
> the way modem status interrupts are handled.
> 
> The omap driver only checks modem status for the UART_IIR_MSI interrupt type.
> The 8250 driver checks modem status at every interrupt (other than NO_INT).
> 
> I think the UART_MSR_DCTS bit always reflects that the CTS input has changed
> between reads of the MSR _even if auto CTS is on_. So perhaps the hardware
> is being stopped by uart_handle_cts_change() when auto CTS is on?

I doubt that. What I see from a timer debug is that the TX-FIFO level
is at 0, the DMA transfer for say 1024 bytes start.
The FIFO is filled to 64bytes and refilled so it doesn't drop below 50.
At the time of the stall I see that the DMA engine has outstanding
bytes which it should transfer and the TX FIFO is empty. If hardware
flow control stops the transfer, I would expect that the DMA engine
still fills the TX-FIFO until 64 and then waits. But it doesn't.
Writing bytes into the FIFO leads to bytes beeing sent (and I see them
on the other side) but the DMA transfer is still on hold. Canceling the
DMA transfer and re-programming the remaining bytes transfers the
remaining bytes.

The odd thing is that I only triggered it with "less file". It doesn't
happen on regular console interaction or "cat large-file". And it only
triggers on beagle board xm (omap34xx) and I wasn't able to reproduce
it on am335x or dra7. The latter shares the same DMA engine as beagle
board xm.

I remember also that I disabled the HW/SW float control just to make
sure it is not it.

> 
> Regards,
> Peter Hurley
> 
> [The UPF_HARD_FLOW thing was pretty much just done for omap even though
> 8250 already had auto CTS/auto RTS. Serial core hardware flow control support
> needs a redo as drivers have pretty much tacked stuff on randomly.]

Sebastian
Frans Klaver Sept. 11, 2014, 3:11 p.m. UTC | #5
On Thu, Sep 11, 2014 at 02:50:50PM +0200, Sebastian Andrzej Siewior wrote:
> On 09/11/2014 02:32 PM, Peter Hurley wrote:
> > On 09/11/2014 07:42 AM, Sebastian Andrzej Siewior wrote:
> >> I also need a watchdog timer for TX since it seems that on omap3 the
> >> DMA engine suddenly forgets to continue with DMA…
> > 
> > One difference I noticed between the omap driver and the 8250 driver is
> > the way modem status interrupts are handled.
> > 
> > The omap driver only checks modem status for the UART_IIR_MSI interrupt type.
> > The 8250 driver checks modem status at every interrupt (other than NO_INT).
> > 
> > I think the UART_MSR_DCTS bit always reflects that the CTS input has changed
> > between reads of the MSR _even if auto CTS is on_. So perhaps the hardware
> > is being stopped by uart_handle_cts_change() when auto CTS is on?
> 
> I doubt that. What I see from a timer debug is that the TX-FIFO level
> is at 0, the DMA transfer for say 1024 bytes start.
> The FIFO is filled to 64bytes and refilled so it doesn't drop below 50.
> At the time of the stall I see that the DMA engine has outstanding
> bytes which it should transfer and the TX FIFO is empty. If hardware
> flow control stops the transfer, I would expect that the DMA engine
> still fills the TX-FIFO until 64 and then waits. But it doesn't.
> Writing bytes into the FIFO leads to bytes beeing sent (and I see them
> on the other side) but the DMA transfer is still on hold. Canceling the
> DMA transfer and re-programming the remaining bytes transfers the
> remaining bytes.
> 
> The odd thing is that I only triggered it with "less file". It doesn't
> happen on regular console interaction or "cat large-file". And it only
> triggers on beagle board xm (omap34xx) and I wasn't able to reproduce
> it on am335x or dra7. The latter shares the same DMA engine as beagle
> board xm.

I can still reproduce it on am335x. I can get out of it as soon as
something else gets written to the console though.

# echo "<3>something" >/dev/kmsg

Frans


> 
> I remember also that I disabled the HW/SW float control just to make
> sure it is not it.
> 
> > 
> > Regards,
> > Peter Hurley
> > 
> > [The UPF_HARD_FLOW thing was pretty much just done for omap even though
> > 8250 already had auto CTS/auto RTS. Serial core hardware flow control support
> > needs a redo as drivers have pretty much tacked stuff on randomly.]
> 
> Sebastian
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Sebastian Andrzej Siewior Sept. 11, 2014, 4:04 p.m. UTC | #6
On 09/11/2014 05:11 PM, Frans Klaver wrote:

> I can still reproduce it on am335x. I can get out of it as soon as
> something else gets written to the console though.
> 
> # echo "<3>something" >/dev/kmsg

Is this to stall it or to get out of it?

> 
> Frans

Sebastian
Frans Klaver Sept. 11, 2014, 5:04 p.m. UTC | #7
On 11 September 2014 18:04:32 CEST, Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
>On 09/11/2014 05:11 PM, Frans Klaver wrote:
>
>> I can still reproduce it on am335x. I can get out of it as soon as
>> something else gets written to the console though.
>> 
>> # echo "<3>something" >/dev/kmsg
>
>Is this to stall it or to get out of it?

This is to get out of it. I do this from an ssh connection after the console got stuck. The 3 kind of ensures the message is actually sent to ttyS0.


>
>> 
>> Frans
>
>Sebastian
>
>_______________________________________________
>linux-arm-kernel mailing list
>linux-arm-kernel@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Sebastian Andrzej Siewior Sept. 12, 2014, 7:23 a.m. UTC | #8
On 09/11/2014 07:04 PM, Frans Klaver wrote:
> On 11 September 2014 18:04:32 CEST, Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
>> On 09/11/2014 05:11 PM, Frans Klaver wrote:
>>
>>> I can still reproduce it on am335x. I can get out of it as soon as
>>> something else gets written to the console though.
>>>
>>> # echo "<3>something" >/dev/kmsg
>>
>> Is this to stall it or to get out of it?
> 
> This is to get out of it. I do this from an ssh connection after the console got stuck. The 3 kind of ensures the message is actually sent to ttyS0.

Interesting. This shouldn't do anything. If there is a TX operation in
progress then ->start_tx() will simply return and the xmit buffer will
be send once the current TX operation completes.

Is there anything I can do to reproduce this behavior?
This problem only pops-up if you use DMA. With disabled DMA you don't
see this, right?

Sebastian
Sebastian Andrzej Siewior Sept. 12, 2014, 9:51 a.m. UTC | #9
On 09/12/2014 11:40 AM, Frans Klaver wrote:

> I'm not sure. I just reproduced this on a boneblack, using your uart_v9
> branch.
> 
>> This problem only pops-up if you use DMA. With disabled DMA you don't
>> see this, right?
> 
> I get the lockup both with and without DMA enabled. Here's the 8250
> config I use. Our full .config is attached in case it may provide
> something relevant.

Hmm. I have a bone black here. Can you tell what you did to get this
lockup? The port configuration (unless 115200 8N1) and what you did to
get this lockup.

> Something that may also help: when I have a lockup on the boneblack, dma
> is enabled and something is written to console like I described earlier,
> I get the following bad irq:

> Full dmesg is also attached.

This one should be stuffed by this:
	"[RFC] ARM: edma: unconditionally ack the error interrupt"
	https://lkml.org/lkml/2014/9/10/714

> Hope you find something useful in there.

> Thanks,
> Frans

Sebastian
Frans Klaver Sept. 12, 2014, 10:28 a.m. UTC | #10
On Fri, Sep 12, 2014 at 11:51:22AM +0200, Sebastian Andrzej Siewior wrote:
> On 09/12/2014 11:40 AM, Frans Klaver wrote:
> 
> > I'm not sure. I just reproduced this on a boneblack, using your uart_v9
> > branch.
> > 
> >> This problem only pops-up if you use DMA. With disabled DMA you don't
> >> see this, right?
> > 
> > I get the lockup both with and without DMA enabled. Here's the 8250
> > config I use. Our full .config is attached in case it may provide
> > something relevant.
> 
> Hmm. I have a bone black here. Can you tell what you did to get this
> lockup? The port configuration (unless 115200 8N1) and what you did to
> get this lockup.

port config is 115200 8N1. I don't recall doing anything special. I
boot, login, less file and get a lock.


> > Something that may also help: when I have a lockup on the boneblack, dma
> > is enabled and something is written to console like I described earlier,
> > I get the following bad irq:
> 
> > Full dmesg is also attached.
> 
> This one should be stuffed by this:
> 	"[RFC] ARM: edma: unconditionally ack the error interrupt"
> 	https://lkml.org/lkml/2014/9/10/714

OK, that makes the console usable again after I write something to kmsg.

> > Hope you find something useful in there.
> 
> > Thanks,
> > Frans
> 
> Sebastian
Sebastian Andrzej Siewior Sept. 15, 2014, 4:42 p.m. UTC | #11
On 09/12/2014 12:28 PM, Frans Klaver wrote:
> port config is 115200 8N1. I don't recall doing anything special. I
> boot, login, less file and get a lock.

So I booted my mini Debian 7.6 (basic system + openssh) on my beagle
bone black which is:

[    0.000000] AM335X ES2.0 (neon )

configured a console, login, invoked "less file". The file was shown, I
hit on the space key so less shows me more of the file. No lock-up.
I tried booting via NFS and MMC. I tried various files with less.

My dot config is here
	https://breakpoint.cc/config-am335x-bb.txt.xz

If there is nothing specific to the file you do less on I have no idea
what else it could if it is not the config.

Sebastian
Frans Klaver Sept. 16, 2014, 9:05 a.m. UTC | #12
On Mon, Sep 15, 2014 at 06:42:04PM +0200, Sebastian Andrzej Siewior wrote:
> On 09/12/2014 12:28 PM, Frans Klaver wrote:
> > port config is 115200 8N1. I don't recall doing anything special. I
> > boot, login, less file and get a lock.
> 
> So I booted my mini Debian 7.6 (basic system + openssh) on my beagle
> bone black which is:
> 
> [    0.000000] AM335X ES2.0 (neon )
> 
> configured a console, login, invoked "less file". The file was shown, I
> hit on the space key so less shows me more of the file. No lock-up.
> I tried booting via NFS and MMC. I tried various files with less.
> 
> My dot config is here
> 	https://breakpoint.cc/config-am335x-bb.txt.xz
> 
> If there is nothing specific to the file you do less on I have no idea
> what else it could if it is not the config.

I'll test your config and go through the differences.

Thanks,
Frans
Frans Klaver Sept. 16, 2014, 12:42 p.m. UTC | #13
On Tue, Sep 16, 2014 at 11:05:40AM +0200, Frans Klaver wrote:
> On Mon, Sep 15, 2014 at 06:42:04PM +0200, Sebastian Andrzej Siewior wrote:
> > On 09/12/2014 12:28 PM, Frans Klaver wrote:
> > > port config is 115200 8N1. I don't recall doing anything special. I
> > > boot, login, less file and get a lock.
> > 
> > So I booted my mini Debian 7.6 (basic system + openssh) on my beagle
> > bone black which is:
> > 
> > [    0.000000] AM335X ES2.0 (neon )
> > 
> > configured a console, login, invoked "less file". The file was shown, I
> > hit on the space key so less shows me more of the file. No lock-up.
> > I tried booting via NFS and MMC. I tried various files with less.
> > 
> > My dot config is here
> > 	https://breakpoint.cc/config-am335x-bb.txt.xz
> > 
> > If there is nothing specific to the file you do less on I have no idea
> > what else it could if it is not the config.
> 
> I'll test your config and go through the differences.

So far it looks like it isn't in the config. I've tested both your and
my config on debian wheezy on a boneblack. No lock ups.

Less doesn't fill my entire screen, so it looks a bit funky when
scrolling, but I'm not sure if that's related to the driver.

I'll have to look further for things we did that may have impact here.

> Thanks,
> Frans
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Frans Klaver Sept. 16, 2014, 2:23 p.m. UTC | #14
On Tue, Sep 16, 2014 at 02:42:00PM +0200, Frans Klaver wrote:
> On Tue, Sep 16, 2014 at 11:05:40AM +0200, Frans Klaver wrote:
> > On Mon, Sep 15, 2014 at 06:42:04PM +0200, Sebastian Andrzej Siewior wrote:
> > > If there is nothing specific to the file you do less on I have no idea
> > > what else it could if it is not the config.
> > 
> > I'll test your config and go through the differences.
> 
> So far it looks like it isn't in the config. I've tested both your and
> my config on debian wheezy on a boneblack. No lock ups.

No, skip that. It is in the config. I'll hunt it down.
Frans Klaver Sept. 17, 2014, 10:28 a.m. UTC | #15
Hi,

Yesterday's testing was a bit messy. So here goes again.

On Mon, Sep 15, 2014 at 06:42:04PM +0200, Sebastian Andrzej Siewior wrote:
> On 09/12/2014 12:28 PM, Frans Klaver wrote:
> > port config is 115200 8N1. I don't recall doing anything special. I
> > boot, login, less file and get a lock.
> 
> So I booted my mini Debian 7.6 (basic system + openssh) on my beagle
> bone black which is:
> 
> [    0.000000] AM335X ES2.0 (neon )

Mine's the same.

> configured a console, login, invoked "less file". The file was shown, I
> hit on the space key so less shows me more of the file. No lock-up.
> I tried booting via NFS and MMC. I tried various files with less.
> 
> My dot config is here
> 	https://breakpoint.cc/config-am335x-bb.txt.xz
> 
> If there is nothing specific to the file you do less on I have no idea
> what else it could if it is not the config.

It could be environmental. I have three test cases right now. Two of
them on the beagle bone black, the third on our custom am335x based
platform.

- All test cases run the same kernel built from uart_v10-pre1.
- For the black, the board, dtb, and u-boot environment are equal for
  the test cases.

- Bone Black: Debian 7.5
  Login, "less file" doesn't lock up. Scrolling down looks sensible.
  Scrolling up leaves me with a crooked display, provided the minicom
  window is more than 24 lines high. Condensed example:

  Normal less looks like:
  
	Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
	eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad
	minim veniam, quis nostrud exercitation ullamco laboris nisi ut
	aliquip ex ea commodo consequat. Duis aute irure dolor in
	:

  While after scrolling up it looks like

	minim veniam, quis nostrud exercitation ullamco laboris nisi ut
	aliquip ex ea commodo consequat. Duis aute irure dolor in
	:

	Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
	eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad

  vi works sensibly, but only occupies part of the total screen estate
  in minicom. After quitting, minicom doesn't use the rest of the screen
  estate anymore.

  After running vi, less doesn't show the weird scrolling behavior
  anymore, since the console has just been limited to 24x80.

- Bone Black: Yocto poky, core-image-minimal
  Login, "less file" locks up, doesn't show anything. I can exit using
  Ctrl-C.

  vi runs normally, only occupies part of the total screen estate in
  minicom. After quitting, a weird character shows up (typically I see
  ÿ there), but minicom can use the rest of the screen estate again.
  If we disregard the odd character, this is much like the behavior we
  have on the omap-serial driver.

- Custom board: Yocto poky, custom image
  Login, "less file" locks up, showing only "ÿ" in the top left corner
  of the screen. Can get out of there by having something dumped through
  /dev/kmsg.

  vi: see "Bone Black: Yocto poky, core-image-minimal"

Having it summed up like this, I think we're back at ncurses and its
interaction with the serial driver.

Hope this helps. Thanks for your effort so far,
Frans
Sebastian Andrzej Siewior Sept. 17, 2014, 4:34 p.m. UTC | #16
On 09/11/2014 01:42 PM, Sebastian Andrzej Siewior wrote:
>> We should add hooks like tx_dma and rx_dma to struct uart_8250_dma so
>> that the probe drivers can replace serial8250_tx_dma and
>> seria8250_rx_dma, like I think Alan already suggested.
> 
> Okay. Wasn't aware that Alan already suggested that.
> I also need a watchdog timer for TX since it seems that on omap3 the
> DMA engine suddenly forgets to continue with DMA…
> 
> If this is really what we want, I would need to refactor a few things…
> 
>> Let's keep serial8250_tx_dma/rx_dma as the default, and not add any
>> quirks to them. Only if there is a very common case should it be
>> handled in those. The case of RX req needing to be sent before data in
>> FIFO maybe one of those, but I'm no sure.
> 
> keep in mind that both (RX & TX bugs/hacks) need also a bit of handling
> in the 8250-core so it works together (like the tx_err member so we
> fall back to manual xmit)

Done. I've kept the RX workarounds in the 8250_dma and moved the TX
part into the omap driver.
I needed to add the 8250_core pieces of patch #10 [0]. Now If you say,
couldn't this done in an other way then I could move the RX workarounds
pieces to the omap driver as well as the interrupt routine. Any
preferences?

[0] [PATCH 10/16] tty: serial: 8250_dma: optimize the xmit path due to
    UART_BUG_DMA_TX

Sebastian
Heikki Krogerus Sept. 19, 2014, 10:22 a.m. UTC | #17
On Wed, Sep 17, 2014 at 06:34:45PM +0200, Sebastian Andrzej Siewior wrote:
> On 09/11/2014 01:42 PM, Sebastian Andrzej Siewior wrote:
> >> We should add hooks like tx_dma and rx_dma to struct uart_8250_dma so
> >> that the probe drivers can replace serial8250_tx_dma and
> >> seria8250_rx_dma, like I think Alan already suggested.
> > 
> > Okay. Wasn't aware that Alan already suggested that.
> > I also need a watchdog timer for TX since it seems that on omap3 the
> > DMA engine suddenly forgets to continue with DMA…
> > 
> > If this is really what we want, I would need to refactor a few things…
> > 
> >> Let's keep serial8250_tx_dma/rx_dma as the default, and not add any
> >> quirks to them. Only if there is a very common case should it be
> >> handled in those. The case of RX req needing to be sent before data in
> >> FIFO maybe one of those, but I'm no sure.
> > 
> > keep in mind that both (RX & TX bugs/hacks) need also a bit of handling
> > in the 8250-core so it works together (like the tx_err member so we
> > fall back to manual xmit)
> 
> Done. I've kept the RX workarounds in the 8250_dma and moved the TX
> part into the omap driver.
> I needed to add the 8250_core pieces of patch #10 [0]. Now If you say,
> couldn't this done in an other way then I could move the RX workarounds
> pieces to the omap driver as well as the interrupt routine. Any
> preferences?
> 
> [0] [PATCH 10/16] tty: serial: 8250_dma: optimize the xmit path due to
>     UART_BUG_DMA_TX

Couldn't you just replace the handle_irq with a custom irq routine in
the omap driver like you did with set_termios? Where you would do
those tricks and/or call serial8250_handle_irq()?

The 8250_core changes in that patch #10 only modify
serial8250_handle_irg right?


Cheers,
Sebastian Andrzej Siewior Sept. 19, 2014, 10:58 a.m. UTC | #18
On 09/19/2014 12:22 PM, Heikki Krogerus wrote:
> Couldn't you just replace the handle_irq with a custom irq routine in
> the omap driver like you did with set_termios? Where you would do
> those tricks and/or call serial8250_handle_irq()?

Tricks within serial8250_handle_irq(), see [0]. It is not a lot but
still. I could provide my own handle irq, just asking what you would
prefer.

[0]
http://git.breakpoint.cc/cgit/bigeasy/linux.git/commit/?h=uart_v10_pre2&id=f26f161d998ee4a84a0aa6ddff69a435c25f204d

> The 8250_core changes in that patch #10 only modify
> serial8250_handle_irg right?

Correct. However there is another change due to the RX_DMA_BUG. A while
ago you said that this RX_DMA_BUG thing might be something that other
SoC could use, too.
If you ask me now for my own irq routine it would make sense to move RX
bug handling into omap specific code as well.

> Cheers,

Sebastian
Peter Hurley Sept. 19, 2014, 11:25 a.m. UTC | #19
On 09/19/2014 06:58 AM, Sebastian Andrzej Siewior wrote:
> On 09/19/2014 12:22 PM, Heikki Krogerus wrote:
>> Couldn't you just replace the handle_irq with a custom irq routine in
>> the omap driver like you did with set_termios? Where you would do
>> those tricks and/or call serial8250_handle_irq()?
> 
> Tricks within serial8250_handle_irq(), see [0]. It is not a lot but
> still. I could provide my own handle irq, just asking what you would
> prefer.
> 
> [0]
> http://git.breakpoint.cc/cgit/bigeasy/linux.git/commit/?h=uart_v10_pre2&id=f26f161d998ee4a84a0aa6ddff69a435c25f204d
> 
>> The 8250_core changes in that patch #10 only modify
>> serial8250_handle_irg right?
> 
> Correct. However there is another change due to the RX_DMA_BUG. A while
> ago you said that this RX_DMA_BUG thing might be something that other
> SoC could use, too.
> If you ask me now for my own irq routine it would make sense to move RX
> bug handling into omap specific code as well.

I'm not excited at the prospect of an omap-specific irq handler, especially
if this is just a silicon bug. I think it will create and encourage
unnecessary code variation in the 8250 driver. The inertia of an
omap-specific irq handler will mean it will probable stay forever. Suppose
this tx dma bug is later discovered to be fixable inline (rather than by
state), then we'll be stuck with an irq handler that no one will want to
integrate.

Regards,
Peter Hurley
Sebastian Andrzej Siewior Sept. 21, 2014, 8:41 p.m. UTC | #20
* Frans Klaver | 2014-09-17 12:28:12 [+0200]:

>- Bone Black: Yocto poky, core-image-minimal
>  Login, "less file" locks up, doesn't show anything. I can exit using
>  Ctrl-C.

So I have the same with my and the serial-omap driver. No difference
here. The trace looks like this:
|           <idle>-0     [000] d.h.   444.393585: serial8250_handle_irq: iir cc lsr 61
|           <idle>-0     [000] d.h.   444.393605: serial8250_rx_chars: get 0d
received the enter key

|           <idle>-0     [000] d.h.   444.393609: serial8250_rx_chars: insert d lsr 61
|           <idle>-0     [000] d.h.   444.393614: uart_insert_char: 1
|           <idle>-0     [000] d.h.   444.393617: uart_insert_char: 2
|           <idle>-0     [000] dnh.   444.393636: serial8250_tx_chars: empty
|      kworker/0:2-753   [000] d...   444.393686: serial8250_start_tx: empty?1
|      kworker/0:2-753   [000] d.h.   444.393699: serial8250_handle_irq: iir c2 lsr 60
|      kworker/0:2-753   [000] d.h.   444.393705: serial8250_tx_chars: empty
|               sh-1042  [000] d...   444.393822: serial8250_start_tx: empty?1
|               sh-1042  [000] d.h.   444.393836: serial8250_handle_irq: iir c2 lsr 60
|               sh-1042  [000] d.h.   444.393842: serial8250_tx_chars: empty
|               sh-1042  [000] d...   444.393855: serial8250_start_tx: empty?0
|               sh-1042  [000] d.h.   444.393863: serial8250_handle_irq: iir c2 lsr 60
|               sh-1042  [000] d.h.   444.393867: serial8250_tx_chars: put 0d
|               sh-1042  [000] d.h.   444.393871: serial8250_tx_chars: put 0a

shell responded with "\r\n" which I see and then

|               sh-1042  [000] d.h.   444.394057: serial8250_handle_irq: iir c2 lsr 60
|               sh-1042  [000] d.h.   444.394065: serial8250_tx_chars: empty

nothing more. less isn't sending data for some reason. Exactly the same
thing happens in a Debian environment except that it continues:
…
|             bash-2468  [000] d.h.    99.657899: serial8250_tx_chars: put 0a
|             bash-2468  [000] d.h.    99.658089: serial8250_handle_irq: iir c2 lsr 60
|             bash-2468  [000] d.h.    99.658095: serial8250_tx_chars: empty
=>
|             less-2474  [000] d...    99.696038: serial8250_start_tx: empty?0
|             less-2474  [000] d.h.    99.696069: serial8250_handle_irq: iir c2 lsr 60
|             less-2474  [000] d.h.    99.696078: serial8250_tx_chars: put 1b
|             less-2474  [000] d.h.    99.696082: serial8250_tx_chars: put 5b
|             less-2474  [000] d.h.    99.696085: serial8250_tx_chars: put 3f
|             less-2474  [000] d.h.    99.696087: serial8250_tx_chars: put 31

It has to be something about the environment. Booting Debian and chroot
into this RFS and less works perfectly. But since it behaves like that
with both drivers, I guess the problem is somewhere else…

>  vi runs normally, only occupies part of the total screen estate in
>  minicom. After quitting, a weird character shows up (typically I see
>  ÿ there), but minicom can use the rest of the screen estate again.
>  If we disregard the odd character, this is much like the behavior we
>  have on the omap-serial driver.
>- Custom board: Yocto poky, custom image
>  Login, "less file" locks up, showing only "ÿ" in the top left corner
>  of the screen. Can get out of there by having something dumped through
>  /dev/kmsg.

I managed to run into something like that with vi on dra7 and with
little more patience on am335x as well by "vi *" and then ":n".

This gets fixed indeed by writing. Hours of debugging and a lot of hair
less later: the yocto RFS calls set_termios quite a lot. This includes
changing the baudrate (not by yocto but the driver sets it to 0 and then
to the requested one) and this seems to be responsible for the "bad
bytes". I haven't figured out yet I don't see this with omap-serial.
Even worse: If this (set_termios()) happens while the DMA is still
active then it might stall it. A write into the FIFO seems to fix it and
this is where your "echo >/dev/kmsg" fixes things.
If I delay the restore_registers part of set_termios() until TX-DMA is
complete then it seems that the TX-DMA stall does not tall anymore.

>Having it summed up like this, I think we're back at ncurses and its
>interaction with the serial driver.
>
>Hope this helps. Thanks for your effort so far,
>Frans

Sebastian
Heikki Krogerus Sept. 22, 2014, 7:46 a.m. UTC | #21
On Fri, Sep 19, 2014 at 12:58:44PM +0200, Sebastian Andrzej Siewior wrote:
> On 09/19/2014 12:22 PM, Heikki Krogerus wrote:
> > Couldn't you just replace the handle_irq with a custom irq routine in
> > the omap driver like you did with set_termios? Where you would do
> > those tricks and/or call serial8250_handle_irq()?
> 
> Tricks within serial8250_handle_irq(), see [0]. It is not a lot but
> still. I could provide my own handle irq, just asking what you would
> prefer.
> 
> [0]
> http://git.breakpoint.cc/cgit/bigeasy/linux.git/commit/?h=uart_v10_pre2&id=f26f161d998ee4a84a0aa6ddff69a435c25f204d
> 
> > The 8250_core changes in that patch #10 only modify
> > serial8250_handle_irg right?
> 
> Correct. However there is another change due to the RX_DMA_BUG. A while
> ago you said that this RX_DMA_BUG thing might be something that other
> SoC could use, too.
> If you ask me now for my own irq routine it would make sense to move RX
> bug handling into omap specific code as well.

Well, there are no other SoCs at the moment that would need it, and
it's actually possible that there never will be. So yes, just handle
that also in the omap specific code.


Thanks,
Frans Klaver Sept. 22, 2014, 9:28 a.m. UTC | #22
On Sun, Sep 21, 2014 at 10:41:00PM +0200, Sebastian Andrzej Siewior wrote:
> * Frans Klaver | 2014-09-17 12:28:12 [+0200]:
> 
> >- Bone Black: Yocto poky, core-image-minimal
> >  Login, "less file" locks up, doesn't show anything. I can exit using
> >  Ctrl-C.
> 
> So I have the same with my and the serial-omap driver. No difference
> here. The trace looks like this:
> |           <idle>-0     [000] d.h.   444.393585: serial8250_handle_irq: iir cc lsr 61
> |           <idle>-0     [000] d.h.   444.393605: serial8250_rx_chars: get 0d
> received the enter key
> 
> |           <idle>-0     [000] d.h.   444.393609: serial8250_rx_chars: insert d lsr 61
> |           <idle>-0     [000] d.h.   444.393614: uart_insert_char: 1
> |           <idle>-0     [000] d.h.   444.393617: uart_insert_char: 2
> |           <idle>-0     [000] dnh.   444.393636: serial8250_tx_chars: empty
> |      kworker/0:2-753   [000] d...   444.393686: serial8250_start_tx: empty?1
> |      kworker/0:2-753   [000] d.h.   444.393699: serial8250_handle_irq: iir c2 lsr 60
> |      kworker/0:2-753   [000] d.h.   444.393705: serial8250_tx_chars: empty
> |               sh-1042  [000] d...   444.393822: serial8250_start_tx: empty?1
> |               sh-1042  [000] d.h.   444.393836: serial8250_handle_irq: iir c2 lsr 60
> |               sh-1042  [000] d.h.   444.393842: serial8250_tx_chars: empty
> |               sh-1042  [000] d...   444.393855: serial8250_start_tx: empty?0
> |               sh-1042  [000] d.h.   444.393863: serial8250_handle_irq: iir c2 lsr 60
> |               sh-1042  [000] d.h.   444.393867: serial8250_tx_chars: put 0d
> |               sh-1042  [000] d.h.   444.393871: serial8250_tx_chars: put 0a
> 
> shell responded with "\r\n" which I see and then
> 
> |               sh-1042  [000] d.h.   444.394057: serial8250_handle_irq: iir c2 lsr 60
> |               sh-1042  [000] d.h.   444.394065: serial8250_tx_chars: empty
> 
> nothing more. less isn't sending data for some reason. Exactly the same
> thing happens in a Debian environment except that it continues:
> …
> |             bash-2468  [000] d.h.    99.657899: serial8250_tx_chars: put 0a
> |             bash-2468  [000] d.h.    99.658089: serial8250_handle_irq: iir c2 lsr 60
> |             bash-2468  [000] d.h.    99.658095: serial8250_tx_chars: empty
> =>
> |             less-2474  [000] d...    99.696038: serial8250_start_tx: empty?0
> |             less-2474  [000] d.h.    99.696069: serial8250_handle_irq: iir c2 lsr 60
> |             less-2474  [000] d.h.    99.696078: serial8250_tx_chars: put 1b
> |             less-2474  [000] d.h.    99.696082: serial8250_tx_chars: put 5b
> |             less-2474  [000] d.h.    99.696085: serial8250_tx_chars: put 3f
> |             less-2474  [000] d.h.    99.696087: serial8250_tx_chars: put 31
> 
> It has to be something about the environment. Booting Debian and chroot
> into this RFS and less works perfectly. But since it behaves like that
> with both drivers, I guess the problem is somewhere else…
> 
> >  vi runs normally, only occupies part of the total screen estate in
> >  minicom. After quitting, a weird character shows up (typically I see
> >  ÿ there), but minicom can use the rest of the screen estate again.
> >  If we disregard the odd character, this is much like the behavior we
> >  have on the omap-serial driver.
> >- Custom board: Yocto poky, custom image
> >  Login, "less file" locks up, showing only "ÿ" in the top left corner
> >  of the screen. Can get out of there by having something dumped through
> >  /dev/kmsg.
> 
> I managed to run into something like that with vi on dra7 and with
> little more patience on am335x as well by "vi *" and then ":n".
> 
> This gets fixed indeed by writing. Hours of debugging and a lot of hair
> less later: the yocto RFS calls set_termios quite a lot. This includes
> changing the baudrate (not by yocto but the driver sets it to 0 and then
> to the requested one) and this seems to be responsible for the "bad
> bytes". I haven't figured out yet I don't see this with omap-serial.
> Even worse: If this (set_termios()) happens while the DMA is still
> active then it might stall it. A write into the FIFO seems to fix it and
> this is where your "echo >/dev/kmsg" fixes things.
> If I delay the restore_registers part of set_termios() until TX-DMA is
> complete then it seems that the TX-DMA stall does not tall anymore.

Wow, thanks for your work here. This does indeed sound hard to trap.

I guess then we'd still have to answer the question why the yocto build
calls set_termios() so often, but that's not on you then. Did you notice
it even changing settings? We might want to fix this even if the kernel
should be able to cope.

Thanks again,
Frans
Peter Hurley Sept. 23, 2014, 5:03 p.m. UTC | #23
On 09/21/2014 04:41 PM, Sebastian Andrzej Siewior wrote:
> * Frans Klaver | 2014-09-17 12:28:12 [+0200]:
> 
>> - Bone Black: Yocto poky, core-image-minimal
>>  Login, "less file" locks up, doesn't show anything. I can exit using
>>  Ctrl-C.
> 
> So I have the same with my and the serial-omap driver. No difference
> here. The trace looks like this:
> |           <idle>-0     [000] d.h.   444.393585: serial8250_handle_irq: iir cc lsr 61
> |           <idle>-0     [000] d.h.   444.393605: serial8250_rx_chars: get 0d
> received the enter key
> 
> |           <idle>-0     [000] d.h.   444.393609: serial8250_rx_chars: insert d lsr 61
> |           <idle>-0     [000] d.h.   444.393614: uart_insert_char: 1
> |           <idle>-0     [000] d.h.   444.393617: uart_insert_char: 2
> |           <idle>-0     [000] dnh.   444.393636: serial8250_tx_chars: empty
> |      kworker/0:2-753   [000] d...   444.393686: serial8250_start_tx: empty?1
> |      kworker/0:2-753   [000] d.h.   444.393699: serial8250_handle_irq: iir c2 lsr 60
> |      kworker/0:2-753   [000] d.h.   444.393705: serial8250_tx_chars: empty
> |               sh-1042  [000] d...   444.393822: serial8250_start_tx: empty?1
> |               sh-1042  [000] d.h.   444.393836: serial8250_handle_irq: iir c2 lsr 60
> |               sh-1042  [000] d.h.   444.393842: serial8250_tx_chars: empty
> |               sh-1042  [000] d...   444.393855: serial8250_start_tx: empty?0
> |               sh-1042  [000] d.h.   444.393863: serial8250_handle_irq: iir c2 lsr 60
> |               sh-1042  [000] d.h.   444.393867: serial8250_tx_chars: put 0d
> |               sh-1042  [000] d.h.   444.393871: serial8250_tx_chars: put 0a
> 
> shell responded with "\r\n" which I see and then
> 
> |               sh-1042  [000] d.h.   444.394057: serial8250_handle_irq: iir c2 lsr 60
> |               sh-1042  [000] d.h.   444.394065: serial8250_tx_chars: empty
> 
> nothing more. less isn't sending data for some reason. Exactly the same
> thing happens in a Debian environment except that it continues:
> …
> |             bash-2468  [000] d.h.    99.657899: serial8250_tx_chars: put 0a
> |             bash-2468  [000] d.h.    99.658089: serial8250_handle_irq: iir c2 lsr 60
> |             bash-2468  [000] d.h.    99.658095: serial8250_tx_chars: empty
> =>
> |             less-2474  [000] d...    99.696038: serial8250_start_tx: empty?0
> |             less-2474  [000] d.h.    99.696069: serial8250_handle_irq: iir c2 lsr 60
> |             less-2474  [000] d.h.    99.696078: serial8250_tx_chars: put 1b
> |             less-2474  [000] d.h.    99.696082: serial8250_tx_chars: put 5b
> |             less-2474  [000] d.h.    99.696085: serial8250_tx_chars: put 3f
> |             less-2474  [000] d.h.    99.696087: serial8250_tx_chars: put 31
> 
> It has to be something about the environment. Booting Debian and chroot
> into this RFS and less works perfectly. But since it behaves like that
> with both drivers, I guess the problem is somewhere else…
> 
>>  vi runs normally, only occupies part of the total screen estate in
>>  minicom. After quitting, a weird character shows up (typically I see
>>  ÿ there), but minicom can use the rest of the screen estate again.
>>  If we disregard the odd character, this is much like the behavior we
>>  have on the omap-serial driver.
>> - Custom board: Yocto poky, custom image
>>  Login, "less file" locks up, showing only "ÿ" in the top left corner
>>  of the screen. Can get out of there by having something dumped through
>>  /dev/kmsg.
> 
> I managed to run into something like that with vi on dra7 and with
> little more patience on am335x as well by "vi *" and then ":n".
> 
> This gets fixed indeed by writing. Hours of debugging and a lot of hair
> less later: the yocto RFS calls set_termios quite a lot.

readline() does this; it 'saves' the caller's termios, sets termios
for non-canonical reads, reads one char, and 'restores' the caller's
termios.

> This includes
> changing the baudrate (not by yocto but the driver sets it to 0 and then
> to the requested one) and this seems to be responsible for the "bad
> bytes". I haven't figured out yet I don't see this with omap-serial.
> Even worse: If this (set_termios()) happens while the DMA is still
> active then it might stall it. A write into the FIFO seems to fix it and
> this is where your "echo >/dev/kmsg" fixes things.
> If I delay the restore_registers part of set_termios() until TX-DMA is
> complete then it seems that the TX-DMA stall does not tall anymore.

The tty core calls the driver's wait_until_sent() method before changing
the termios (if TCSADRAIN is used for tcsetattr(), which I think for readline()
it does).

But DMA is cheating if the UART driver's tx_empty() method is saying the
transmitter is empty while TX DMA is still running.

Regards,
Peter Hurley
Sebastian Andrzej Siewior Sept. 24, 2014, 7:53 a.m. UTC | #24
* Peter Hurley | 2014-09-23 13:03:51 [-0400]:

>readline() does this; it 'saves' the caller's termios, sets termios
>for non-canonical reads, reads one char, and 'restores' the caller's
>termios.

interresting, thanks. I guess I would need to opimize this a little so
the baudrate isn't going to 0 and back to the requested baudrate.

>The tty core calls the driver's wait_until_sent() method before changing
>the termios (if TCSADRAIN is used for tcsetattr(), which I think for readline()
>it does).

The interresting difference is that when I take the yocto RFS and chroot
into from Debian then I don't this problem. Not sure if this is really
readline or something else…

>But DMA is cheating if the UART driver's tx_empty() method is saying the
>transmitter is empty while TX DMA is still running.
This shouldn't be the case. But I will check this once I able to.
After TX-DMA is done, "xmit->tail" is updated and port.icount.tx is
incremented. At this time the TX FIFO is still full (up to 64 bytes) and
I set UART_IER_THRI to wait until TX FIFO is empty so I can disable
runtime-pm. Therefore I would assume LSR does not say BOTH_EMPTY until
the FIFO is empty.

>Regards,
>Peter Hurley

Sebastian
Sebastian Andrzej Siewior Sept. 24, 2014, 7:56 a.m. UTC | #25
* Frans Klaver | 2014-09-22 11:28:54 [+0200]:

>
>Wow, thanks for your work here. This does indeed sound hard to trap.
>
>I guess then we'd still have to answer the question why the yocto build
>calls set_termios() so often, but that's not on you then. Did you notice
>it even changing settings? We might want to fix this even if the kernel
>should be able to cope.

Yeah. I don't care if this is yocto's fault or not. If it is possible to
stop the DMA transfer from userland with whatever method then it needs to
be fixed in kernel so that this does happen.

>Thanks again,
>Frans

Sebastian
Sebastian Andrzej Siewior Sept. 25, 2014, 9:24 a.m. UTC | #26
* Heikki Krogerus | 2014-09-22 10:46:05 [+0300]:

>Well, there are no other SoCs at the moment that would need it, and
>it's actually possible that there never will be. So yes, just handle
>that also in the omap specific code.

Okay. So let me move the irq routine and the workarounds into omap then.

>Thanks,
>

Sebastian
Sebastian Andrzej Siewior Sept. 25, 2014, 10:42 a.m. UTC | #27
* Sebastian Andrzej Siewior | 2014-09-24 09:53:46 [+0200]:

>* Peter Hurley | 2014-09-23 13:03:51 [-0400]:
>
>>But DMA is cheating if the UART driver's tx_empty() method is saying the
>>transmitter is empty while TX DMA is still running.
>This shouldn't be the case. But I will check this once I able to.

I added 

|#define BOTH_EMPTY      (UART_LSR_TEMT | UART_LSR_THRE)
|    trace_printk("delay <%d>\n", (lsr & BOTH_EMPTY) == BOTH_EMPTY ? 1 : 0);

in my set_termios() and the trace shows
|              vi-949   [000] d...    70.477002: omap8250_restore_regs: delay <0>

so no, it does not wait until TX FIFO is empty. It looks like it uses
TCSANOW instead of TCSADRAIN. And since this looks "legal" I will delay
it until TX-DMA is complete because it is known to stall the DMA operation.

>>Regards,
>>Peter Hurley

Sebastian
Peter Hurley Sept. 25, 2014, 11:31 a.m. UTC | #28
On 09/25/2014 06:42 AM, Sebastian Andrzej Siewior wrote:
> * Sebastian Andrzej Siewior | 2014-09-24 09:53:46 [+0200]:
> 
>> * Peter Hurley | 2014-09-23 13:03:51 [-0400]:
>>
>>> But DMA is cheating if the UART driver's tx_empty() method is saying the
>>> transmitter is empty while TX DMA is still running.
>> This shouldn't be the case. But I will check this once I able to.
> 
> I added 
> 
> |#define BOTH_EMPTY      (UART_LSR_TEMT | UART_LSR_THRE)
> |    trace_printk("delay <%d>\n", (lsr & BOTH_EMPTY) == BOTH_EMPTY ? 1 : 0);
> 
> in my set_termios() and the trace shows
> |              vi-949   [000] d...    70.477002: omap8250_restore_regs: delay <0>
> 
> so no, it does not wait until TX FIFO is empty. It looks like it uses
> TCSANOW instead of TCSADRAIN. And since this looks "legal" I will delay
> it until TX-DMA is complete because it is known to stall the DMA operation.

I just verified that GNU readline6 uses ioctl(TCSETSW, ...) to do the set_termios
(which is the ioctl that libc should use for tcsetattr(TCSADRAIN)).

Maybe this userspace is using a readline()-alike that has a bug by not using the
correct tcsetattr() action?
Or maybe the glibc-equivalent has the bug, and tcsetattr(TCSADRAIN) is not using
ioctl(TCSETSW)?

Regards,
Peter Hurley
Sebastian Andrzej Siewior Sept. 25, 2014, 1:11 p.m. UTC | #29
* Peter Hurley | 2014-09-25 07:31:32 [-0400]:

>I just verified that GNU readline6 uses ioctl(TCSETSW, ...) to do the set_termios
>(which is the ioctl that libc should use for tcsetattr(TCSADRAIN)).
>
>Maybe this userspace is using a readline()-alike that has a bug by not using the
>correct tcsetattr() action?

set_termios() has an opt argument. While doing ":n" in vi I see two invocations
with "opt == 8" which stands for TCSETS.
browsing through vi's code I stumbled upon 

|static void rawmode(void)
|{
…
|    tcsetattr_stdin_TCSANOW(&term_vi);
|}
|int FAST_FUNC tcsetattr_stdin_TCSANOW(const struct termios *tp)
|{
|    return tcsetattr(STDIN_FILENO, TCSANOW, tp);
|}

and this is probably what you meant. There is also
| static void cookmode(void)
| {
|     fflush_all();
|     tcsetattr_stdin_TCSANOW(&term_orig);
| }

However I don't see __tty_perform_flush() in kernel invoked.

>Or maybe the glibc-equivalent has the bug, and tcsetattr(TCSADRAIN) is not using
>ioctl(TCSETSW)?

libc is "GNU C Library 2.20". 

>Regards,
>Peter Hurley

Sebastian
Sebastian Andrzej Siewior Sept. 25, 2014, 3:14 p.m. UTC | #30
* Frans Klaver | 2014-09-22 11:28:54 [+0200]:

>I guess then we'd still have to answer the question why the yocto build
>calls set_termios() so often, but that's not on you then. Did you notice
>it even changing settings? We might want to fix this even if the kernel
>should be able to cope.

could you please test uart_v10_pre3? I dropped a few register sets and
delayed the register update until TX-DMA is complete. I am traveling
currently and have only my boneblack and it passes my limited testing.
If it solves your problems, too then I would address Heikki's latest
comments and prepare the final v10 (and I hope I didn't break beagle
board in between).

>Thanks again,
>Frans

Sebastian
Frans Klaver Sept. 25, 2014, 3:18 p.m. UTC | #31
On 25 September 2014 17:14:03 CEST, Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
>* Frans Klaver | 2014-09-22 11:28:54 [+0200]:
>
>>I guess then we'd still have to answer the question why the yocto
>build
>>calls set_termios() so often, but that's not on you then. Did you
>notice
>>it even changing settings? We might want to fix this even if the
>kernel
>>should be able to cope.
>
>could you please test uart_v10_pre3? I dropped a few register sets and
>delayed the register update until TX-DMA is complete. I am traveling
>currently and have only my boneblack and it passes my limited testing.
>If it solves your problems, too then I would address Heikki's latest
>comments and prepare the final v10 (and I hope I didn't break beagle
>board in between).

That'll be Monday earliest for me. 

>
>>Thanks again,
>>Frans
>
>Sebastian
>
>_______________________________________________
>linux-arm-kernel mailing list
>linux-arm-kernel@lists.infradead.org
>http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Frans Klaver Sept. 29, 2014, 8:50 a.m. UTC | #32
On Thu, Sep 25, 2014 at 05:14:03PM +0200, Sebastian Andrzej Siewior wrote:
> * Frans Klaver | 2014-09-22 11:28:54 [+0200]:
> 
> >I guess then we'd still have to answer the question why the yocto build
> >calls set_termios() so often, but that's not on you then. Did you notice
> >it even changing settings? We might want to fix this even if the kernel
> >should be able to cope.
> 
> could you please test uart_v10_pre3? I dropped a few register sets and
> delayed the register update until TX-DMA is complete. I am traveling
> currently and have only my boneblack and it passes my limited testing.
> If it solves your problems, too then I would address Heikki's latest
> comments and prepare the final v10 (and I hope I didn't break beagle
> board in between).

This version fixes the console things for us. It also increases the
amount of data we can push over the serial port. If I push the data
towards our requirements, we're not there yet. I get the "too much work
for irq" notice still. However, I don't think you'd need to be fixing
that in this series (or at all). We had similar issues there with
omap-serial as well.

As far as I'm concerned, this is

Tested-by: Frans Klaver <frans.klaver@xsens.com>

Thanks,
Frans
Sebastian Andrzej Siewior Sept. 29, 2014, 9:54 a.m. UTC | #33
* Frans Klaver | 2014-09-29 10:50:42 [+0200]:

>This version fixes the console things for us. It also increases the
>amount of data we can push over the serial port. If I push the data
>towards our requirements, we're not there yet. I get the "too much work
>for irq" notice still. However, I don't think you'd need to be fixing
>that in this series (or at all). We had similar issues there with
>omap-serial as well.
>
>As far as I'm concerned, this is
>
>Tested-by: Frans Klaver <frans.klaver@xsens.com>

Thanks a lot.
There is a patch named "ARM: edma: unconditionally ack the error
interrupt". I have the feeling that this is not really required once we
delay set_termios. I couldn't reproduce the bug with beagleblack with my
usual test case.

For your "too much work for irq" problem: Could you add trace_printk()
in tx/rx dma start/complete, and irq routine? The interresting part is
what is the irq routine doing once entered. It might be a condition that
is ignored at first and "acked" later while serving another event. Or it
is really doing something and this is more or less "legal".

>Thanks,
>Frans

Sebastian
Frans Klaver Sept. 29, 2014, 10:30 a.m. UTC | #34
On Mon, Sep 29, 2014 at 11:54:40AM +0200, Sebastian Andrzej Siewior wrote:
> There is a patch named "ARM: edma: unconditionally ack the error
> interrupt". I have the feeling that this is not really required once we
> delay set_termios. I couldn't reproduce the bug with beagleblack with my
> usual test case.

I think so too. I didn't need it either.


> For your "too much work for irq" problem: Could you add trace_printk()
> in tx/rx dma start/complete, and irq routine? The interresting part is
> what is the irq routine doing once entered. It might be a condition that
> is ignored at first and "acked" later while serving another event. Or it
> is really doing something and this is more or less "legal".

I'll have a look at that.

> 
> >Thanks,
> >Frans
> 
> Sebastian
Frans Klaver Sept. 30, 2014, 8:44 a.m. UTC | #35
On Mon, Sep 29, 2014 at 12:30:43PM +0200, Frans Klaver wrote:
> On Mon, Sep 29, 2014 at 11:54:40AM +0200, Sebastian Andrzej Siewior wrote:
> > For your "too much work for irq" problem: Could you add trace_printk()
> > in tx/rx dma start/complete, and irq routine? The interresting part is
> > what is the irq routine doing once entered. It might be a condition that
> > is ignored at first and "acked" later while serving another event. Or it
> > is really doing something and this is more or less "legal".
> 

Here's some trace output I get. I hope branches become clear by the
calls they do.

       uart_test-482   [000] .ns.    17.860139: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
       uart_test-482   [000] .ns.    17.860141: __dma_rx_do_complete: rx_dma(p, 0)
       uart_test-480   [000] ..s.    17.860260: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
       uart_test-480   [000] ..s.    17.860263: __dma_rx_do_complete: rx_dma(p, 0)
       uart_test-478   [000] ..s.    17.860369: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
       uart_test-478   [000] ..s.    17.860372: __dma_rx_do_complete: rx_dma(p, 0)
     kworker/0:1-10    [000] ..s.    17.860508: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
     kworker/0:1-10    [000] ..s.    17.860512: __dma_rx_do_complete: rx_dma(p, 0)
       uart_test-477   [000] ..s.    17.860634: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
       uart_test-477   [000] ..s.    17.860642: __dma_rx_do_complete: rx_dma(p, 0)
       uart_test-477   [000] ..s.    17.860768: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
       uart_test-477   [000] ..s.    17.860772: __dma_rx_do_complete: rx_dma(p, 0)
     kworker/0:1-10    [000] ..s.    17.860900: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
     kworker/0:1-10    [000] ..s.    17.860904: __dma_rx_do_complete: rx_dma(p, 0)
       uart_test-483   [000] dnh.    17.861018: serial8250_interrupt: irq 89
       uart_test-483   [000] dnh.    17.861026: serial8250_interrupt: 89 e
       uart_test-483   [000] .ns.    17.861045: __dma_rx_do_complete: error: 0, uart_bug_dma: 32
       uart_test-483   [000] .ns.    17.861047: __dma_rx_do_complete: rx_dma(p, 0)
       uart_test-479   [000] d.H.    17.861124: serial8250_interrupt: irq 89
       uart_test-479   [000] d.H.    17.861133: serial8250_handle_irq: l1 IIR cc LSR 61
       uart_test-479   [000] d.H.    17.861135: serial8250_handle_irq: rx_dma(up, iir)
       uart_test-479   [000] d.H.    17.861139: serial8250_rx_dma: l1, UART_IIR_RX_TIMEOUT
       uart_test-479   [000] d.H.    17.861147: __dma_rx_do_complete: error: 1, uart_bug_dma: 32
       uart_test-479   [000] d.H.    17.861150: serial8250_handle_irq: rx_chars(up, status)
       uart_test-479   [000] d.H.    17.861190: serial8250_handle_irq: rx_dma(up, 0)
       uart_test-479   [000] d.H.    17.861205: serial8250_interrupt: 89 e
     kworker/0:1-10    [000] dnH.    17.864949: serial8250_interrupt: irq 89

So far so good. We're just entered interrupt where stuff goes awry. The
following pattern repeats over 600 times:

     kworker/0:1-10    [000] dnH.    17.865198: serial8250_handle_irq: l1 IIR cc LSR 61
     kworker/0:1-10    [000] dnH.    17.865254: serial8250_handle_irq: rx_dma(up, iir)
     kworker/0:1-10    [000] dnH.    17.865333: serial8250_rx_dma: l1, UART_IIR_RX_TIMEOUT
     kworker/0:1-10    [000] dnH.    17.865626: __dma_rx_do_complete: error: 1, uart_bug_dma: 32
     kworker/0:1-10    [000] dnH.    17.865747: serial8250_handle_irq: rx_chars(up, status)
     kworker/0:1-10    [000] dnH.    17.868797: serial8250_handle_irq: rx_dma(up, 0)

ending with:

     kworker/0:1-10    [000] dnH.    20.027093: serial8250_interrupt: serial8250: too much work for irq89
     kworker/0:1-10    [000] dnH.    20.027181: serial8250_interrupt: 89 e

This clogs the entire system until I disconnect the communication lines.

So we get an RX timeout. The receiver fifo trigger level was not reached
and 8250_core is left to copy the remaining data. I would expect that if
the trigger level wasn't reached, we wouldn't need to be doing this that
often. On the other hand, could we be trapped reading data from rx
without dma helping us? And how could we resolve this?

Frans
diff mbox

Patch

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index fbed1636e9c4..09489b391568 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -82,6 +82,9 @@  struct serial8250_config {
 #define UART_BUG_PARITY	(1 << 4)	/* UART mishandles parity if FIFO enabled */
 #define UART_BUG_DMA_RX	(1 << 5)	/* UART needs DMA RX req before there is
 					   data in FIFO */
+#define UART_BUG_DMA_TX	(1 << 6)	/* UART needs one byte in FIFO for
+					   kickstart */
+
 #define PROBE_RSA	(1 << 0)
 #define PROBE_ANY	(~0)
 
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 3674900a1f14..48dc57aad0dd 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -83,6 +83,7 @@  int serial8250_tx_dma(struct uart_8250_port *p)
 	struct uart_8250_dma		*dma = p->dma;
 	struct circ_buf			*xmit = &p->port.state->xmit;
 	struct dma_async_tx_descriptor	*desc;
+	unsigned int			skip_byte = 0;
 	int ret;
 
 	if (uart_tx_stopped(&p->port) || dma->tx_running ||
@@ -91,10 +92,40 @@  int serial8250_tx_dma(struct uart_8250_port *p)
 
 	dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
 
+	if (p->bugs & UART_BUG_DMA_TX) {
+		u8 tx_lvl;
+
+		/*
+		 * We need to put the first byte into the FIFO in order to start
+		 * the DMA transfer. For transfers smaller than four bytes we
+		 * don't bother doing DMA at all. It seem not matter if there
+		 * are still bytes in the FIFO from the last transfer (in case
+		 * we got here directly from __dma_tx_complete()). Bytes leaving
+		 * the FIFO seem not to trigger the DMA transfer. It is really
+		 * the byte that we put into the FIFO.
+		 * If the FIFO is already full then we most likely got here from
+		 * __dma_tx_complete(). And this means the DMA engine just
+		 * completed its work. We don't have to wait the complete 86us
+		 * at 115200,8n1 but around 60us (not to mention lower
+		 * baudrates). So in that case we take the interrupt and try
+		 * again with an empty FIFO.
+		 */
+		tx_lvl = serial_in(p, UART_OMAP_TX_LVL);
+		if (tx_lvl == p->tx_loadsz) {
+			ret = -EBUSY;
+			goto err;
+		}
+		if (dma->tx_size < 4) {
+			ret = -EINVAL;
+			goto err;
+		}
+		skip_byte = 1;
+	}
+
 	desc = dmaengine_prep_slave_single(dma->txchan,
-					   dma->tx_addr + xmit->tail,
-					   dma->tx_size, DMA_MEM_TO_DEV,
-					   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			dma->tx_addr + xmit->tail + skip_byte,
+			dma->tx_size - skip_byte, DMA_MEM_TO_DEV,
+			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc) {
 		ret = -EBUSY;
 		goto err;
@@ -118,6 +149,8 @@  int serial8250_tx_dma(struct uart_8250_port *p)
 			serial_out(p, UART_IER, p->ier);
 		}
 	}
+	if (skip_byte)
+		serial_out(p, UART_TX, xmit->buf[xmit->tail]);
 	return 0;
 err:
 	dma->tx_err = 1;
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index df6c9ab6b0cd..53af3b790129 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -359,6 +359,7 @@ 
 #define UART_OMAP_SYSC		0x15	/* System configuration register */
 #define UART_OMAP_SYSS		0x16	/* System status register */
 #define UART_OMAP_WER		0x17	/* Wake-up enable register */
+#define UART_OMAP_TX_LVL	0x1a	/* TX FIFO level register */
 
 /*
  * These are the definitions for the MDR1 register