diff mbox series

[U-Boot,v2,4/9] efi_loader: new function utf8_to_utf16

Message ID 20171013173314.22304-5-xypron.glpk@gmx.de
State Superseded, archived
Delegated to: Alexander Graf
Headers show
Series efi_loader: implement SetWatchdogTimer | expand

Commit Message

Heinrich Schuchardt Oct. 13, 2017, 5:33 p.m. UTC
Provide a conversion function from utf8 to utf16.

Add missing #include <linux/types.h> in include/charset.h.
Remove superfluous #include <common.h> in lib/charset.c.

Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
---
v2
	new patch
---
 include/charset.h | 15 +++++++++++++++
 lib/charset.c     | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/include/charset.h b/include/charset.h
index 37a3278499..2662c2f7c9 100644
--- a/include/charset.h
+++ b/include/charset.h
@@ -9,6 +9,8 @@ 
 #ifndef __CHARSET_H_
 #define __CHARSET_H_
 
+#include <linux/types.h>
+
 #define MAX_UTF8_PER_UTF16 3
 
 /**
@@ -62,4 +64,17 @@  uint16_t *utf16_strdup(const uint16_t *s);
  */
 uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
 
+/**
+ * utf8_to_utf16() - Convert an utf8 string to utf16
+ *
+ * Converts up to 'size' characters of the utf16 string 'src' to utf8
+ * written to the 'dest' buffer. Stops at 0x00.
+ *
+ * @dest   the destination buffer to write the utf8 characters
+ * @src    the source utf16 string
+ * @size   maximum number of utf16 characters to convert
+ * @return the pointer to the first unwritten byte in 'dest'
+ */
+uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size);
+
 #endif /* __CHARSET_H_ */
diff --git a/lib/charset.c b/lib/charset.c
index ff76e88c77..8cd17ea1cb 100644
--- a/lib/charset.c
+++ b/lib/charset.c
@@ -6,7 +6,6 @@ 
  *  SPDX-License-Identifier:     GPL-2.0+
  */
 
-#include <common.h>
 #include <charset.h>
 #include <malloc.h>
 
@@ -99,3 +98,59 @@  uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
 
 	return dest;
 }
+
+uint16_t *utf8_to_utf16(uint16_t *dest, const uint8_t *src, size_t size)
+{
+	while (size--) {
+		int extension_bytes;
+		uint32_t code;
+
+		extension_bytes = 0;
+		if (*src <= 0x7f) {
+			code = *src++;
+			/* Exit on zero byte */
+			if (!code)
+				size = 0;
+		} else if (*src <= 0xbf) {
+			/* Illegal code */
+			code = '?';
+		} else if (*src <= 0xdf) {
+			code = *src++ & 0x1f;
+			extension_bytes = 1;
+		} else if (*src <= 0xef) {
+			code = *src++ & 0x0f;
+			extension_bytes = 2;
+		} else if (*src <= 0xf7) {
+			code = *src++ & 0x07;
+			extension_bytes = 3;
+		} else {
+			/* Illegal code */
+			code = '?';
+		}
+
+		for (; extension_bytes && size; --size, --extension_bytes) {
+			if ((*src & 0xc0) == 0x80) {
+				code <<= 6;
+				code |= *src++ & 0x3f;
+			} else {
+				/* Illegal code */
+				code = '?';
+				++src;
+				--size;
+				break;
+			}
+		}
+
+		if (code < 0x10000) {
+			*dest++ = code;
+		} else {
+			/*
+			 * Simplified expression for
+			 * (((code - 0x10000) >> 10) & 0x3ff) | 0xd800
+			 */
+			*dest++ = (code >> 10) + 0xd7c0;
+			*dest++ = (code & 0x3ff) | 0xdc00;
+		}
+	}
+	return dest;
+}