diff mbox series

[2/6] efi_loader: carve out utf_to_cp()

Message ID 20210227130840.166193-3-xypron.glpk@gmx.de
State Accepted, archived
Commit 73bb90cabcdffcd528d1002a12779779196bf200
Delegated to: Heinrich Schuchardt
Headers show
Series efi_loader: Unicode output in UEFI applications | expand

Commit Message

Heinrich Schuchardt Feb. 27, 2021, 1:08 p.m. UTC
Carve out a function to translate a Unicode code point to an 8bit codepage.

Provide a unit test for the new function.

Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
---
 include/charset.h                      | 11 ++++++++++
 lib/charset.c                          | 28 +++++++++++++++++++++++++
 lib/efi_loader/efi_unicode_collation.c | 19 +++--------------
 test/unicode_ut.c                      | 29 ++++++++++++++++++++++++++
 4 files changed, 71 insertions(+), 16 deletions(-)

--
2.30.0
diff mbox series

Patch

diff --git a/include/charset.h b/include/charset.h
index 64ba91f791..52e7d1474e 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -275,4 +275,15 @@  u16 *u16_strdup(const void *src);
  */
 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);

+/**
+ * utf_to_cp() - translate Unicode code point to 8bit codepage
+ *
+ * Codepoints that do not exist in the codepage are rendered as question mark.
+ *
+ * @c:		pointer to Unicode code point to be translated
+ * @codepage:	Unicode to codepage translation table
+ * Return:	0 on success, -ENOENT if codepoint cannot be translated
+ */
+int utf_to_cp(s32 *c, const u16 *codepage);
+
 #endif /* __CHARSET_H_ */
diff --git a/lib/charset.c b/lib/charset.c
index 814847d165..1345c8f9f0 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -10,6 +10,7 @@ 
 #include <capitalization.h>
 #include <cp437.h>
 #include <efi_loader.h>
+#include <errno.h>
 #include <malloc.h>

 /**
@@ -472,3 +473,30 @@  uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)

 	return dest;
 }
+
+/**
+ * utf_to_cp() - translate Unicode code point to 8bit codepage
+ *
+ * Codepoints that do not exist in the codepage are rendered as question mark.
+ *
+ * @c:		pointer to Unicode code point to be translated
+ * @codepage:	Unicode to codepage translation table
+ * Return:	0 on success, -ENOENT if codepoint cannot be translated
+ */
+int utf_to_cp(s32 *c, const u16 *codepage)
+{
+	if (*c >= 0x80) {
+		int j;
+
+		/* Look up codepage translation */
+		for (j = 0; j < 0x80; ++j) {
+			if (*c == codepage[j]) {
+				*c = j + 0x80;
+				return 0;
+			}
+		}
+		*c = '?';
+		return -ENOENT;
+	}
+	return 0;
+}
diff --git a/lib/efi_loader/efi_unicode_collation.c b/lib/efi_loader/efi_unicode_collation.c
index bf5314c4ff..36be798f64 100644
--- a/lib/efi_loader/efi_unicode_collation.c
+++ b/lib/efi_loader/efi_unicode_collation.c
@@ -300,23 +300,10 @@  static bool EFIAPI efi_str_to_fat(struct efi_unicode_collation_protocol *this,
 			break;
 		}
 		c = utf_to_upper(c);
-		if (c >= 0x80) {
-			int j;
-
-			/* Look for codepage translation */
-			for (j = 0; j < 0x80; ++j) {
-				if (c == codepage[j]) {
-					c = j + 0x80;
-					break;
-				}
-			}
-			if (j >= 0x80) {
-				c = '_';
-				ret = true;
-			}
-		} else if (c && (c < 0x20 || strchr(illegal, c))) {
-			c = '_';
+		if (utf_to_cp(&c, codepage) ||
+		    (c && (c < 0x20 || strchr(illegal, c)))) {
 			ret = true;
+			c = '_';
 		}

 		fat[i] = c;
diff --git a/test/unicode_ut.c b/test/unicode_ut.c
index 6130ef0b54..2cc6b5feff 100644
--- a/test/unicode_ut.c
+++ b/test/unicode_ut.c
@@ -595,6 +595,35 @@  static int unicode_test_u16_strsize(struct unit_test_state *uts)
 }
 UNICODE_TEST(unicode_test_u16_strsize);

+static int unicode_test_utf_to_cp(struct unit_test_state *uts)
+{
+	int ret;
+	s32 c;
+
+	c = '\n';
+	ret = utf_to_cp(&c, codepage_437);
+	ut_asserteq(0, ret);
+	ut_asserteq('\n', c);
+
+	c = 'a';
+	ret = utf_to_cp(&c, codepage_437);
+	ut_asserteq(0, ret);
+	ut_asserteq('a', c);
+
+	c = 0x03c4; /* Greek small letter tau */
+	ret = utf_to_cp(&c, codepage_437);
+	ut_asserteq(0, ret);
+	ut_asserteq(0xe7, c);
+
+	c = 0x03a4; /* Greek capital letter tau */
+	ret = utf_to_cp(&c, codepage_437);
+	ut_asserteq(-ENOENT, ret);
+	ut_asserteq('?', c);
+
+	return 0;
+}
+UNICODE_TEST(unicode_test_utf_to_cp);
+
 #ifdef CONFIG_EFI_LOADER
 static int unicode_test_efi_create_indexed_name(struct unit_test_state *uts)
 {