diff mbox series

[U-Boot,05/15] lib: vsprintf: correct printing of Unicode strings

Message ID 20180811152820.26817-6-xypron.glpk@gmx.de
State Superseded, archived
Delegated to: Alexander Graf
Headers show
Series efi_loader: EFI_UNICODE_COLLATION_PROTOCOL | expand

Commit Message

Heinrich Schuchardt Aug. 11, 2018, 3:28 p.m. UTC
The width and precision of the printf() function refer to the number of
characters not to the number of bytes printed.

Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
---
 lib/vsprintf.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

Comments

Alexander Graf Aug. 26, 2018, 6:05 p.m. UTC | #1
On 11.08.18 17:28, Heinrich Schuchardt wrote:
> The width and precision of the printf() function refer to the number of
> characters not to the number of bytes printed.
> 
> Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
> ---
>  lib/vsprintf.c | 22 +++++++++++++---------
>  1 file changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> index a07128ad96..b7eb9d5f5e 100644
> --- a/lib/vsprintf.c
> +++ b/lib/vsprintf.c
> @@ -280,18 +280,22 @@ static char *string16(char *buf, char *end, u16 *s, int field_width,
>  		int precision, int flags)
>  {
>  	u16 *str = s ? s : L"<NULL>";
> -	int utf16_len = u16_strnlen(str, precision);
> -	u8 utf8[utf16_len * MAX_UTF8_PER_UTF16];
> -	int utf8_len, i;
> -
> -	utf8_len = utf16_to_utf8(utf8, str, utf16_len) - utf8;
> +	ssize_t i, len = utf16_strnlen(str, precision);
>  
>  	if (!(flags & LEFT))
> -		while (utf8_len < field_width--)
> +		for (; len < field_width; --field_width)
>  			ADDCH(buf, ' ');
> -	for (i = 0; i < utf8_len; ++i)
> -		ADDCH(buf, utf8[i]);
> -	while (utf8_len < field_width--)
> +	for (i = 0; i < len; ++i) {
> +		s32 code = utf16_get((const u16 **)&str);
> +
> +		if (code < 0) {
> +			code = '?';
> +			if (*str)
> +				++str;
> +		}
> +		utf8_put(code, &buf);

Can you introduce or reuse a strcpy() helper in charset.c for this? That
way the compiler has the chance to inline utf16_get() and utf8_put() and
make the function fast.


Alex
Heinrich Schuchardt Aug. 26, 2018, 6:34 p.m. UTC | #2
On 08/26/2018 08:05 PM, Alexander Graf wrote:
> 
> 
> On 11.08.18 17:28, Heinrich Schuchardt wrote:
>> The width and precision of the printf() function refer to the number of
>> characters not to the number of bytes printed.
>>
>> Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
>> ---
>>  lib/vsprintf.c | 22 +++++++++++++---------
>>  1 file changed, 13 insertions(+), 9 deletions(-)
>>
>> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
>> index a07128ad96..b7eb9d5f5e 100644
>> --- a/lib/vsprintf.c
>> +++ b/lib/vsprintf.c
>> @@ -280,18 +280,22 @@ static char *string16(char *buf, char *end, u16 *s, int field_width,
>>  		int precision, int flags)
>>  {
>>  	u16 *str = s ? s : L"<NULL>";
>> -	int utf16_len = u16_strnlen(str, precision);
>> -	u8 utf8[utf16_len * MAX_UTF8_PER_UTF16];
>> -	int utf8_len, i;
>> -
>> -	utf8_len = utf16_to_utf8(utf8, str, utf16_len) - utf8;
>> +	ssize_t i, len = utf16_strnlen(str, precision);
>>  
>>  	if (!(flags & LEFT))
>> -		while (utf8_len < field_width--)
>> +		for (; len < field_width; --field_width)
>>  			ADDCH(buf, ' ');
>> -	for (i = 0; i < utf8_len; ++i)
>> -		ADDCH(buf, utf8[i]);
>> -	while (utf8_len < field_width--)
>> +	for (i = 0; i < len; ++i) {
>> +		s32 code = utf16_get((const u16 **)&str);
>> +
>> +		if (code < 0) {
>> +			code = '?';
>> +			if (*str)
>> +				++str;
>> +		}
>> +		utf8_put(code, &buf);
> 
> Can you introduce or reuse a strcpy() helper in charset.c for this? That
> way the compiler has the chance to inline utf16_get() and utf8_put() and
> make the function fast.

strcpy() works on bytes not on multi-byte utf-8 characters. So it is
unclear to me how I should make use of strcpy() here.

Of cause we could define utf8_put() and utf8_get() as inline function.
But that would increase code size. Is this what you would prefer? I
would guess that the serial interface is always the slowest part of text
output anyway.

Regards

Heinrich

> 
> 
> Alex
>
Alexander Graf Aug. 26, 2018, 10:01 p.m. UTC | #3
On 26.08.18 20:34, Heinrich Schuchardt wrote:
> On 08/26/2018 08:05 PM, Alexander Graf wrote:
>>
>>
>> On 11.08.18 17:28, Heinrich Schuchardt wrote:
>>> The width and precision of the printf() function refer to the number of
>>> characters not to the number of bytes printed.
>>>
>>> Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
>>> ---
>>>  lib/vsprintf.c | 22 +++++++++++++---------
>>>  1 file changed, 13 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
>>> index a07128ad96..b7eb9d5f5e 100644
>>> --- a/lib/vsprintf.c
>>> +++ b/lib/vsprintf.c
>>> @@ -280,18 +280,22 @@ static char *string16(char *buf, char *end, u16 *s, int field_width,
>>>  		int precision, int flags)
>>>  {
>>>  	u16 *str = s ? s : L"<NULL>";
>>> -	int utf16_len = u16_strnlen(str, precision);
>>> -	u8 utf8[utf16_len * MAX_UTF8_PER_UTF16];
>>> -	int utf8_len, i;
>>> -
>>> -	utf8_len = utf16_to_utf8(utf8, str, utf16_len) - utf8;
>>> +	ssize_t i, len = utf16_strnlen(str, precision);
>>>  
>>>  	if (!(flags & LEFT))
>>> -		while (utf8_len < field_width--)
>>> +		for (; len < field_width; --field_width)
>>>  			ADDCH(buf, ' ');
>>> -	for (i = 0; i < utf8_len; ++i)
>>> -		ADDCH(buf, utf8[i]);
>>> -	while (utf8_len < field_width--)
>>> +	for (i = 0; i < len; ++i) {
>>> +		s32 code = utf16_get((const u16 **)&str);
>>> +
>>> +		if (code < 0) {
>>> +			code = '?';
>>> +			if (*str)
>>> +				++str;
>>> +		}
>>> +		utf8_put(code, &buf);
>>
>> Can you introduce or reuse a strcpy() helper in charset.c for this? That
>> way the compiler has the chance to inline utf16_get() and utf8_put() and
>> make the function fast.
> 
> strcpy() works on bytes not on multi-byte utf-8 characters. So it is
> unclear to me how I should make use of strcpy() here.

What I was trying to imply is that what you're doing here is very
similar to utf8_utf16_strncpy(). Maybe we can reuse the same function or
at least something very similar.

> Of cause we could define utf8_put() and utf8_get() as inline function.
> But that would increase code size. Is this what you would prefer? I
> would guess that the serial interface is always the slowest part of text
> output anyway.

Real serial output is definitely orders of magnitude slower, I agree.
But if we can make the code easier to read along the way I'm all for it ;).

I think what it boils down to is that I'd prefer if we keep
utf{8,16}_{get,put}() as local to charset.c as we can and instead put
slightly higher level wrappers around them, like you did for pretty much
everything else.


Alex
diff mbox series

Patch

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index a07128ad96..b7eb9d5f5e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -280,18 +280,22 @@  static char *string16(char *buf, char *end, u16 *s, int field_width,
 		int precision, int flags)
 {
 	u16 *str = s ? s : L"<NULL>";
-	int utf16_len = u16_strnlen(str, precision);
-	u8 utf8[utf16_len * MAX_UTF8_PER_UTF16];
-	int utf8_len, i;
-
-	utf8_len = utf16_to_utf8(utf8, str, utf16_len) - utf8;
+	ssize_t i, len = utf16_strnlen(str, precision);
 
 	if (!(flags & LEFT))
-		while (utf8_len < field_width--)
+		for (; len < field_width; --field_width)
 			ADDCH(buf, ' ');
-	for (i = 0; i < utf8_len; ++i)
-		ADDCH(buf, utf8[i]);
-	while (utf8_len < field_width--)
+	for (i = 0; i < len; ++i) {
+		s32 code = utf16_get((const u16 **)&str);
+
+		if (code < 0) {
+			code = '?';
+			if (*str)
+				++str;
+		}
+		utf8_put(code, &buf);
+	}
+	for (; i < field_width; --field_width)
 		ADDCH(buf, ' ');
 	return buf;
 }