diff mbox

[v3] printk: Have printk() never buffer its data

Message ID 22482.1341471787@neuling.org (mailing list archive)
State Not Applicable
Headers show

Commit Message

Michael Neuling July 5, 2012, 7:03 a.m. UTC
> On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:
> > On Mon, Jun 25, 2012 at 5:56 PM, Kay Sievers <kay@vrfy.org> wrote:
> > >
> > > Buffering has nice effects though:
> > > It makes continuation lines appear as one record in the buffer, not as
> > > n individual prints with n headers.
> > 
> > As I already mentioned, buffering for *logging* is different from
> > buffering for *printing*.
> > 
> > I think it might be a great idea to buffer for logging in order to
> > generate one individual buffer record there.
> > 
> > But it needs to be printed as it is generated.
> 
> That's a good idea.
> 
> Something like this could work - only minimally tested at this moment.

This breaks some powerpc configs and is in Linus' tree now as
084681d14e.

When we have printks without a newline (like show_regs()), it
sometimes:
1) drops the console output for that line (dmesg is fine).  Patch to fix
   this below.
2) adds a newline unnecessary to both console and dmesg.  I have no fix
   for this currently.
Reverting this patch fixes both problems.

Post rc4 is way to late to be adding largely untested features like
this.  

I suggest we revert this until the next merge window.

Mikey




> 
> 
> From: Kay Sievers <kay@vrfy.org>
> Subject: printk: flush continuation lines immediately to console
> 
> Continuation lines are buffered internally to merge the chunked
> printk()s into a single record, and to isolate potentially racy
> continuation users from usual terminated line users.
> 
> This though, has the effect that partial lines are not printed to
> the console in the moment they are emitted. In case the kernel
> crashes in the meantime, the potentially interesting printed
> information will never reach the consoles.
> 
> Here we share the continuation buffer with the console copy logic,
> and partial lines are immediately flushed to the consoles, but still
> buffered internally to improve the integrity of the messages.
> ---
> 
>  kernel/printk.c |  129 +++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 101 insertions(+), 28 deletions(-)
> 
> diff --git a/kernel/printk.c b/kernel/printk.c
> index a2276b9..92c34bd1 100644
> --- a/kernel/printk.c
> +++ b/kernel/printk.c
> @@ -193,12 +193,19 @@ static int console_may_schedule;
>   * separated by ',', and find the message after the ';' character.
>   */
>  
> +enum log_flags {
> +	LOG_DEFAULT = 0,
> +	LOG_NOCONS = 1,		/* already flushed, do not print to console */
> +};
> +
>  struct log {
>  	u64 ts_nsec;		/* timestamp in nanoseconds */
>  	u16 len;		/* length of entire record */
>  	u16 text_len;		/* length of text buffer */
>  	u16 dict_len;		/* length of dictionary buffer */
> -	u16 level;		/* syslog level + facility */
> +	u8 facility;		/* syslog facility */
> +	u8 flags:5;		/* internal record flags */
> +	u8 level:3;		/* syslog level */
>  };
>  
>  /*
> @@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
>  
>  /* insert record into the buffer, discard old ones, update heads */
>  static void log_store(int facility, int level,
> +		      enum log_flags flags, u64 ts_nsec,
>  		      const char *dict, u16 dict_len,
>  		      const char *text, u16 text_len)
>  {
> @@ -329,8 +337,13 @@ static void log_store(int facility, int level,
>  	msg->text_len = text_len;
>  	memcpy(log_dict(msg), dict, dict_len);
>  	msg->dict_len = dict_len;
> -	msg->level = (facility << 3) | (level & 7);
> -	msg->ts_nsec = local_clock();
> +	msg->facility = facility;
> +	msg->level = level & 7;
> +	msg->flags = flags & 0x1f;
> +	if (ts_nsec > 0)
> +		msg->ts_nsec = ts_nsec;
> +	else
> +		msg->ts_nsec = local_clock();
>  	memset(log_dict(msg) + dict_len, 0, pad_len);
>  	msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
>  
> @@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
>  	ts_usec = msg->ts_nsec;
>  	do_div(ts_usec, 1000);
>  	len = sprintf(user->buf, "%u,%llu,%llu;",
> -		      msg->level, user->seq, ts_usec);
> +		      (msg->facility << 3) | msg->level, user->seq, ts_usec);
>  
>  	/* escape non-printable characters */
>  	for (i = 0; i < msg->text_len; i++) {
> @@ -787,6 +800,21 @@ static bool printk_time;
>  #endif
>  module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
>  
> +static size_t print_time(u64 ts, char *buf)
> +{
> +	unsigned long rem_nsec;
> +
> +	if (!printk_time)
> +		return 0;
> +
> +	if (!buf)
> +		return 15;
> +
> +	rem_nsec = do_div(ts, 1000000000);
> +	return sprintf(buf, "[%5lu.%06lu] ",
> +		       (unsigned long)ts, rem_nsec / 1000);
> +}
> +
>  static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
>  {
>  	size_t len = 0;
> @@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
>  		}
>  	}
>  
> -	if (printk_time) {
> -		if (buf) {
> -			unsigned long long ts = msg->ts_nsec;
> -			unsigned long rem_nsec = do_div(ts, 1000000000);
> -
> -			len += sprintf(buf + len, "[%5lu.%06lu] ",
> -					 (unsigned long) ts, rem_nsec / 1000);
> -		} else {
> -			len += 15;
> -		}
> -	}
> -
> +	len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
>  	return len;
>  }
>  
> @@ -1272,15 +1289,25 @@ static inline void printk_delay(void)
>  	}
>  }
>  
> +/*
> + * Continuation lines are buffered, and not committed to the record buffer
> + * until the line is complete, or a race forces a flush. The line fragments
> + * though, are printed immediately to the consoles to ensure everything has
> + * reached the console in case of a kernel crash.
> + */
> +static char cont_buf[LOG_LINE_MAX];
> +static size_t cont_len;
> +static size_t cont_cons;
> +static size_t cont_cons_len;
> +static int cont_level;
> +static u64 cont_ts_nsec;
> +static struct task_struct *cont_task;
> +
>  asmlinkage int vprintk_emit(int facility, int level,
>  			    const char *dict, size_t dictlen,
>  			    const char *fmt, va_list args)
>  {
>  	static int recursion_bug;
> -	static char cont_buf[LOG_LINE_MAX];
> -	static size_t cont_len;
> -	static int cont_level;
> -	static struct task_struct *cont_task;
>  	static char textbuf[LOG_LINE_MAX];
>  	char *text = textbuf;
>  	size_t text_len;
> @@ -1326,7 +1353,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		recursion_bug = 0;
>  		printed_len += strlen(recursion_msg);
>  		/* emit KERN_CRIT message */
> -		log_store(0, 2, NULL, 0, recursion_msg, printed_len);
> +		log_store(0, 2, LOG_DEFAULT, 0,
> +			  NULL, 0, recursion_msg, printed_len);
>  	}
>  
>  	/*
> @@ -1369,13 +1397,17 @@ asmlinkage int vprintk_emit(int facility, int level,
>  			 * Flush earlier buffer, which is either from a
>  			 * different thread, or when we got a new prefix.
>  			 */
> -			log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
> +			log_store(facility, cont_level,
> +				  LOG_NOCONS, cont_ts_nsec,
> +				  NULL, 0, cont_buf, cont_len);
>  			cont_len = 0;
>  		}
>  
>  		if (!cont_len) {
> +			cont_cons = 0;
>  			cont_level = level;
>  			cont_task = current;
> +			cont_ts_nsec = local_clock();
>  		}
>  
>  		/* buffer or append to earlier buffer from the same thread */
> @@ -1383,6 +1415,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>  			text_len = sizeof(cont_buf) - cont_len;
>  		memcpy(cont_buf + cont_len, text, text_len);
>  		cont_len += text_len;
> +		cont_cons_len = cont_len;
> +		printed_len = text_len;
>  	} else {
>  		if (cont_len && cont_task == current) {
>  			if (prefix) {
> @@ -1392,6 +1426,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  				 * with an interrupt.
>  				 */
>  				log_store(facility, cont_level,
> +					  LOG_NOCONS, cont_ts_nsec,
>  					  NULL, 0, cont_buf, cont_len);
>  				cont_len = 0;
>  			}
> @@ -1401,14 +1436,15 @@ asmlinkage int vprintk_emit(int facility, int level,
>  				text_len = sizeof(cont_buf) - cont_len;
>  			memcpy(cont_buf + cont_len, text, text_len);
>  			cont_len += text_len;
> +			cont_cons_len = cont_len;
>  			log_store(facility, cont_level,
> +				  LOG_NOCONS, cont_ts_nsec,
>  				  NULL, 0, cont_buf, cont_len);
>  			cont_len = 0;
> -			cont_task = NULL;
> -			printed_len = cont_len;
> +			printed_len = text_len;
>  		} else {
>  			/* ordinary single and terminated line */
> -			log_store(facility, level,
> +			log_store(facility, level, LOG_DEFAULT, 0,
>  				  dict, dictlen, text, text_len);
>  			printed_len = text_len;
>  		}
> @@ -1795,6 +1831,7 @@ static u32 console_idx;
>   */
>  void console_unlock(void)
>  {
> +	static char text[LOG_LINE_MAX];
>  	static u64 seen_seq;
>  	unsigned long flags;
>  	bool wake_klogd = false;
> @@ -1807,10 +1844,37 @@ void console_unlock(void)
>  
>  	console_may_schedule = 0;
>  
> +	/* flush buffered message fragment immediately to console */
> +	raw_spin_lock_irqsave(&logbuf_lock, flags);
> +	if (cont_cons_len && (cont_cons < cont_cons_len || !cont_len)) {
> +		size_t textlen = 0;
> +		size_t len;
> +
> +		len = cont_cons_len - cont_cons;
> +		if (!cont_cons)
> +			textlen = print_time(cont_ts_nsec, text);
> +		memcpy(text + textlen, cont_buf + cont_cons, len);
> +		textlen += len;
> +		cont_cons = cont_cons_len;
> +
> +		if (!cont_len) {
> +			/* last chunk of line; terminate */
> +			text[textlen++] = '\n';
> +			cont_cons_len = 0;
> +		}
> +		raw_spin_unlock(&logbuf_lock);
> +
> +		stop_critical_timings();
> +		call_console_drivers(cont_level, text, textlen);
> +		start_critical_timings();
> +
> +		local_irq_restore(flags);
> +	} else
> +		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> +
>  again:
>  	for (;;) {
>  		struct log *msg;
> -		static char text[LOG_LINE_MAX];
>  		size_t len;
>  		int level;
>  
> @@ -1825,13 +1889,22 @@ again:
>  			console_seq = log_first_seq;
>  			console_idx = log_first_idx;
>  		}
> -
> +skip:
>  		if (console_seq == log_next_seq)
>  			break;
>  
>  		msg = log_from_idx(console_idx);
> -		level = msg->level & 7;
> +		if (msg->flags & LOG_NOCONS) {
> +			/*
> +			 * Skip record we have buffered and already printed
> +			 * directly to the console when we received it.
> +			 */
> +			console_idx = log_next(console_idx);
> +			console_seq++;
> +			goto skip;
> +		}
>  
> +		level = msg->level;
>  		len = msg_print_text(msg, false, text, sizeof(text));
>  
>  		console_idx = log_next(console_idx);
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

Comments

Kay Sievers July 5, 2012, 8:39 a.m. UTC | #1
On Thu, Jul 5, 2012 at 9:03 AM, Michael Neuling <mikey@neuling.org> wrote:
>> On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:

>> > I think it might be a great idea to buffer for logging in order to
>> > generate one individual buffer record there.
>> >
>> > But it needs to be printed as it is generated.
>>
>> That's a good idea.
>>
>> Something like this could work - only minimally tested at this moment.
>
> This breaks some powerpc configs and is in Linus' tree now as
> 084681d14e.
>
> When we have printks without a newline (like show_regs()), it
> sometimes:

x86 has that a lot too.

> 1) drops the console output for that line (dmesg is fine).  Patch to fix
>    this below.

That doesn't look right. We should already have put that out to the
console, and we only want to store it away. Your patch, as expected,
duplicates all the continuation lines on the console here:
[    0.674957] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
[    0.674957] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0

> 2) adds a newline unnecessary to both console and dmesg.  I have no fix
>    for this currently.
> Reverting this patch fixes both problems.

Not here. I can't reproduce any of this here, it all looks fine.

Is that possibly some early printk() or other console trickery on ppc
that produces the issue?

Thanks,
Kay
Kay Sievers July 5, 2012, 8:53 a.m. UTC | #2
On Thu, Jul 5, 2012 at 10:39 AM, Kay Sievers <kay@vrfy.org> wrote:
> On Thu, Jul 5, 2012 at 9:03 AM, Michael Neuling <mikey@neuling.org> wrote:
>>> On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:
>
>>> > I think it might be a great idea to buffer for logging in order to
>>> > generate one individual buffer record there.
>>> >
>>> > But it needs to be printed as it is generated.
>>>
>>> That's a good idea.
>>>
>>> Something like this could work - only minimally tested at this moment.
>>
>> This breaks some powerpc configs and is in Linus' tree now as
>> 084681d14e.
>>
>> When we have printks without a newline (like show_regs()), it
>> sometimes:
>
> x86 has that a lot too.
>
>> 1) drops the console output for that line (dmesg is fine).  Patch to fix
>>    this below.
>
> That doesn't look right. We should already have put that out to the
> console, and we only want to store it away. Your patch, as expected,
> duplicates all the continuation lines on the console here:
> [    0.674957] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
> [    0.674957] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
>
>> 2) adds a newline unnecessary to both console and dmesg.  I have no fix
>>    for this currently.
>> Reverting this patch fixes both problems.
>
> Not here. I can't reproduce any of this here, it all looks fine.
>
> Is that possibly some early printk() or other console trickery on ppc
> that produces the issue?

Or, are you really sure this isn't just a race with another printk and
the content got merged with some other line of the output? The added
newline you mention could suggest that.

With the buffering for the console output removed, I see garbled
console output here too (like we always had before the new kmsg) when
continuation printks race against printks from any other process. That
was all gone with the buffering (only two continuation prints would
race, not any other), but we needed to skip buffering the console
output for other, more important reasons.

Kay
Michael Neuling July 5, 2012, 10:20 a.m. UTC | #3
Kay Sievers <kay@vrfy.org> wrote:

> On Thu, Jul 5, 2012 at 10:39 AM, Kay Sievers <kay@vrfy.org> wrote:
> > On Thu, Jul 5, 2012 at 9:03 AM, Michael Neuling <mikey@neuling.org> wrote:
> >>> On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:
> >
> >>> > I think it might be a great idea to buffer for logging in order to
> >>> > generate one individual buffer record there.
> >>> >
> >>> > But it needs to be printed as it is generated.
> >>>
> >>> That's a good idea.
> >>>
> >>> Something like this could work - only minimally tested at this moment.
> >>
> >> This breaks some powerpc configs and is in Linus' tree now as
> >> 084681d14e.
> >>
> >> When we have printks without a newline (like show_regs()), it
> >> sometimes:
> >
> > x86 has that a lot too.
> >
> >> 1) drops the console output for that line (dmesg is fine).  Patch to fix
> >>    this below.
> >
> > That doesn't look right. We should already have put that out to the
> > console, and we only want to store it away. Your patch, as expected,
> > duplicates all the continuation lines on the console here:
> > [    0.674957] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
> > [    0.674957] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
> >
> >> 2) adds a newline unnecessary to both console and dmesg.  I have no fix
> >>    for this currently.
> >> Reverting this patch fixes both problems.
> >
> > Not here. I can't reproduce any of this here, it all looks fine.
> >
> > Is that possibly some early printk() or other console trickery on ppc
> > that produces the issue?
> 
> Or, are you really sure this isn't just a race with another printk and
> the content got merged with some other line of the output? The added
> newline you mention could suggest that.

I can make 1) happen on UP or SMP.

I can only make 2) happen on SMP.  It's when the second CPU is coming up
and it's printing something.  The first CPU isn't printing anything at
this stage (there is no garbled console here) so I don't think it's a
race.  I see it consistently in show_regs().  Every printk without a
newline.  ie I get this:
---
NIP: c000000000048164 LR: c000000000048160 CTR: 0000000000000000
REGS: c00000007e59fb50 TRAP: 0700   Tainted: G        W     (3.5.0-rc5-mikey)
MSR: 9000000000021032 
<
SF
,HV
,ME
,IR
,DR
,RI
>
  CR: 28000042  XER: 22000000
SOFTE: 0
CFAR: c0000000007402f8
TASK = c00000007e56bb40[0] 'swapper/1' THREAD: c00000007e59c000
 CPU: 1
---

It's consistent for printks without newlines in show_regs().  MSR
through to XER should all be on the same line.

Mikey

> With the buffering for the console output removed, I see garbled
> console output here too (like we always had before the new kmsg) when
> continuation printks race against printks from any other process. That
> was all gone with the buffering (only two continuation prints would
> race, not any other), but we needed to skip buffering the console
> output for other, more important reasons.
Kay Sievers July 5, 2012, 11:47 a.m. UTC | #4
On Thu, Jul 5, 2012 at 12:20 PM, Michael Neuling <mikey@neuling.org> wrote:

> I can only make 2) happen on SMP.  It's when the second CPU is coming up
> and it's printing something.  The first CPU isn't printing anything at
> this stage (there is no garbled console here) so I don't think it's a
> race.  I see it consistently in show_regs().  Every printk without a
> newline.  ie I get this:
> ---
> NIP: c000000000048164 LR: c000000000048160 CTR: 0000000000000000
> REGS: c00000007e59fb50 TRAP: 0700   Tainted: G        W     (3.5.0-rc5-mikey)
> MSR: 9000000000021032
> <
> SF
> ,HV
> ,ME
> ,IR
> ,DR
> ,RI
>>
>   CR: 28000042  XER: 22000000
> SOFTE: 0
> CFAR: c0000000007402f8
> TASK = c00000007e56bb40[0] 'swapper/1' THREAD: c00000007e59c000
>  CPU: 1
> ---
>
> It's consistent for printks without newlines in show_regs().  MSR
> through to XER should all be on the same line.

I see. Something to fix then, I'll have a look.

Does this happen only very early during bootup, or also later when the
box fully initialized?

The output of 'dmesg' later looks always correct, right?

Thanks,
Kay
diff mbox

Patch

diff --git a/kernel/printk.c b/kernel/printk.c
index dba1821..91659da 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1335,7 +1335,7 @@  static void cont_flush(void)
        if (cont.len == 0)
                return;
 
-       log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
+       log_store(cont.facility, cont.level, LOG_DEFAULT, cont.ts_nsec,
                  NULL, 0, cont.buf, cont.len);
 
        cont.flushed = true;