diff mbox series

[RFC,3/6] lib/charset: Map cp437 low chars (0x01 - 0x1f) from unicode

Message ID 20240117-vidconsole-utf8-uefi-v1-3-539f7ce74fb9@jannau.net
State RFC
Delegated to: Anatolij Gustschin
Headers show
Series video: Add UTF-8 support for UEFI applications | expand

Commit Message

Janne Grunau via B4 Relay Jan. 17, 2024, 10:24 p.m. UTC
From: Janne Grunau <j@jannau.net>

Add mappings for code points 1 - 31 as those code points in code page 437
are graphics.
Thios fixes rendering issues of various EFI boot loaders (grub2,
sd-boot, ...) using EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.

Signed-off-by: Janne Grunau <j@jannau.net>
---
 include/charset.h                      |  2 +-
 include/cp1250.h                       | 12 ++++++++++--
 include/cp437.h                        | 12 ++++++++++--
 lib/charset.c                          |  9 ++++++---
 lib/efi_loader/efi_unicode_collation.c |  2 +-
 5 files changed, 28 insertions(+), 9 deletions(-)

Comments

Heinrich Schuchardt Jan. 18, 2024, 6:33 p.m. UTC | #1
On 1/17/24 23:24, Janne Grunau via B4 Relay wrote:
> From: Janne Grunau <j@jannau.net>
>
> Add mappings for code points 1 - 31 as those code points in code page 437
> are graphics.
> Thios fixes rendering issues of various EFI boot loaders (grub2,

%s/Thios/This/

> sd-boot, ...) using EFI_SIMPLE_TEXT_OUTPUT_PROTOCOL.

Could you, please, explain the reason for the change a bit more in depth.

The applications above produce UTF-16. A Truetype font should not have
symbols for positions 0x00-0x1f. Codepage 437 is only relevant for FAT
file names that might use this character set.

Why do you want to translate well define Unicode code points to
characters 0x00-0x1f?

Best regards

Heinrich

>
> Signed-off-by: Janne Grunau <j@jannau.net>
> ---
>   include/charset.h                      |  2 +-
>   include/cp1250.h                       | 12 ++++++++++--
>   include/cp437.h                        | 12 ++++++++++--
>   lib/charset.c                          |  9 ++++++---
>   lib/efi_loader/efi_unicode_collation.c |  2 +-
>   5 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/include/charset.h b/include/charset.h
> index 714382e1c1..c51c29235f 100644
> --- a/include/charset.h
> +++ b/include/charset.h
> @@ -16,7 +16,7 @@
>   /*
>    * codepage_437 - Unicode to codepage 437 translation table
>    */
> -extern const u16 codepage_437[128];
> +extern const u16 codepage_437[160];
>
>   /**
>    * console_read_unicode() - read Unicode code point from console
> diff --git a/include/cp1250.h b/include/cp1250.h
> index adacf8a958..b762c78d9f 100644
> --- a/include/cp1250.h
> +++ b/include/cp1250.h
> @@ -1,10 +1,18 @@
>   /* SPDX-License-Identifier: GPL-2.0+ */
>
>   /*
> - * Constant CP1250 contains the Unicode code points for characters 0x80 - 0xff
> - * of the code page 1250.
> + * Constant CP1250 contains the Unicode code points for characters 0x00 - 0x1f
> + * and 0x80 - 0xff of the code page 1250.
>    */
>   #define CP1250 { \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
> +	0x0000, 0x0000, 0x0000, 0x0000, \
>   	0x20ac, 0x0000, 0x201a, 0x0000, \
>   	0x201e, 0x2026, 0x2020, 0x2021, \
>   	0x0000, 0x2030, 0x0160, 0x2039, \
> diff --git a/include/cp437.h b/include/cp437.h
> index 0b2b97132e..5093130f5e 100644
> --- a/include/cp437.h
> +++ b/include/cp437.h
> @@ -1,10 +1,18 @@
>   /* SPDX-License-Identifier: GPL-2.0+ */
>
>   /*
> - * Constant CP437 contains the Unicode code points for characters 0x80 - 0xff
> - * of the code page 437.
> + * Constant CP437 contains the Unicode code points for characters 0x00 - 0x1f
> + * and 0x80 - 0xff of the code page 437.
>    */
>   #define CP437 { \
> +	0x0000, 0x263a, 0x263b, 0x2665, \
> +	0x2666, 0x2663, 0x2660, 0x2022, \
> +	0x25d8, 0x25cb, 0x25d9, 0x2642, \
> +	0x2640, 0x266a, 0x266b, 0x263c, \
> +	0x25ba, 0x25c4, 0x2195, 0x203c, \
> +	0x00b6, 0x00a7, 0x25ac, 0x21a8, \
> +	0x2191, 0x2193, 0x2192, 0x2190, \
> +	0x221f, 0x2194, 0x25b2, 0x25bc, \
>   	0x00c7, 0x00fc, 0x00e9, 0x00e2, \
>   	0x00e4, 0x00e0, 0x00e5, 0x00e7, \
>   	0x00ea, 0x00eb, 0x00e8, 0x00ef, \
> diff --git a/lib/charset.c b/lib/charset.c
> index 5e4c4f948a..1f8480150a 100644
> --- a/lib/charset.c
> +++ b/lib/charset.c
> @@ -16,7 +16,7 @@
>   /**
>    * codepage_437 - Unicode to codepage 437 translation table
>    */
> -const u16 codepage_437[128] = CP437;
> +const u16 codepage_437[160] = CP437;
>
>   static struct capitalization_table capitalization_table[] =
>   #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
> @@ -517,9 +517,12 @@ int utf_to_cp(s32 *c, const u16 *codepage)
>   		int j;
>
>   		/* Look up codepage translation */
> -		for (j = 0; j < 0x80; ++j) {
> +		for (j = 0; j < 0xA0; ++j) {
>   			if (*c == codepage[j]) {
> -				*c = j + 0x80;
> +				if (j < 0x20)
> +					*c = j;
> +				else
> +					*c = j + 0x60;
>   				return 0;
>   			}
>   		}
> diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c
> index c4c7572063..4b2c52918a 100644
> --- a/lib/efi_loader/efi_unicode_collation.c
> +++ b/lib/efi_loader/efi_unicode_collation.c
> @@ -257,7 +257,7 @@ static void EFIAPI efi_fat_to_str(struct efi_unicode_collation_protocol *this,
>   	for (i = 0; i < fat_size; ++i) {
>   		c = (unsigned char)fat[i];
>   		if (c > 0x80)
> -			c = codepage[c - 0x80];
> +			c = codepage[c - 0x60];
>   		string[i] = c;
>   		if (!c)
>   			break;
>
diff mbox series

Patch

diff --git a/include/charset.h b/include/charset.h
index 714382e1c1..c51c29235f 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -16,7 +16,7 @@ 
 /*
  * codepage_437 - Unicode to codepage 437 translation table
  */
-extern const u16 codepage_437[128];
+extern const u16 codepage_437[160];
 
 /**
  * console_read_unicode() - read Unicode code point from console
diff --git a/include/cp1250.h b/include/cp1250.h
index adacf8a958..b762c78d9f 100644
--- a/include/cp1250.h
+++ b/include/cp1250.h
@@ -1,10 +1,18 @@ 
 /* SPDX-License-Identifier: GPL-2.0+ */
 
 /*
- * Constant CP1250 contains the Unicode code points for characters 0x80 - 0xff
- * of the code page 1250.
+ * Constant CP1250 contains the Unicode code points for characters 0x00 - 0x1f
+ * and 0x80 - 0xff of the code page 1250.
  */
 #define CP1250 { \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
+	0x0000, 0x0000, 0x0000, 0x0000, \
 	0x20ac, 0x0000, 0x201a, 0x0000, \
 	0x201e, 0x2026, 0x2020, 0x2021, \
 	0x0000, 0x2030, 0x0160, 0x2039, \
diff --git a/include/cp437.h b/include/cp437.h
index 0b2b97132e..5093130f5e 100644
--- a/include/cp437.h
+++ b/include/cp437.h
@@ -1,10 +1,18 @@ 
 /* SPDX-License-Identifier: GPL-2.0+ */
 
 /*
- * Constant CP437 contains the Unicode code points for characters 0x80 - 0xff
- * of the code page 437.
+ * Constant CP437 contains the Unicode code points for characters 0x00 - 0x1f
+ * and 0x80 - 0xff of the code page 437.
  */
 #define CP437 { \
+	0x0000, 0x263a, 0x263b, 0x2665, \
+	0x2666, 0x2663, 0x2660, 0x2022, \
+	0x25d8, 0x25cb, 0x25d9, 0x2642, \
+	0x2640, 0x266a, 0x266b, 0x263c, \
+	0x25ba, 0x25c4, 0x2195, 0x203c, \
+	0x00b6, 0x00a7, 0x25ac, 0x21a8, \
+	0x2191, 0x2193, 0x2192, 0x2190, \
+	0x221f, 0x2194, 0x25b2, 0x25bc, \
 	0x00c7, 0x00fc, 0x00e9, 0x00e2, \
 	0x00e4, 0x00e0, 0x00e5, 0x00e7, \
 	0x00ea, 0x00eb, 0x00e8, 0x00ef, \
diff --git a/lib/charset.c b/lib/charset.c
index 5e4c4f948a..1f8480150a 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -16,7 +16,7 @@ 
 /**
  * codepage_437 - Unicode to codepage 437 translation table
  */
-const u16 codepage_437[128] = CP437;
+const u16 codepage_437[160] = CP437;
 
 static struct capitalization_table capitalization_table[] =
 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
@@ -517,9 +517,12 @@  int utf_to_cp(s32 *c, const u16 *codepage)
 		int j;
 
 		/* Look up codepage translation */
-		for (j = 0; j < 0x80; ++j) {
+		for (j = 0; j < 0xA0; ++j) {
 			if (*c == codepage[j]) {
-				*c = j + 0x80;
+				if (j < 0x20)
+					*c = j;
+				else
+					*c = j + 0x60;
 				return 0;
 			}
 		}
diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c
index c4c7572063..4b2c52918a 100644
--- a/lib/efi_loader/efi_unicode_collation.c
+++ b/lib/efi_loader/efi_unicode_collation.c
@@ -257,7 +257,7 @@  static void EFIAPI efi_fat_to_str(struct efi_unicode_collation_protocol *this,
 	for (i = 0; i < fat_size; ++i) {
 		c = (unsigned char)fat[i];
 		if (c > 0x80)
-			c = codepage[c - 0x80];
+			c = codepage[c - 0x60];
 		string[i] = c;
 		if (!c)
 			break;