Patchwork NLS: improve UTF8 -> UTF16 string conversion routine

login
register
mail settings
Submitter John Johansen
Date Feb. 28, 2013, 6:41 p.m.
Message ID <1362076862-21644-1-git-send-email-john.johansen@canonical.com>
Download mbox | patch
Permalink /patch/224133/
State New
Headers show

Comments

John Johansen - Feb. 28, 2013, 6:41 p.m.
From: Alan Stern <stern@rowland.harvard.edu>

CVE-2013-1773

BugLink: https://launchpad.net/bugs/1134523

The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.

This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called.  A
follow-on patch will add a third caller that does utilize the new
capabilities.

The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations.  But that's a subject for a
different patch.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

(backported from commit 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd)
Conflicts:
    drivers/hv/hv_kvp.c
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 fs/fat/namei_vfat.c |    3 ++-
 fs/nls/nls_base.c   |   43 +++++++++++++++++++++++++++++++++----------
 include/linux/nls.h |    5 +++--
 3 files changed, 38 insertions(+), 13 deletions(-)
John Johansen - Feb. 28, 2013, 6:49 p.m.
On 02/28/2013 10:41 AM, John Johansen wrote:
> From: Alan Stern <stern@rowland.harvard.edu>
> 
Oops sorry messed setting this one up its against Lucid


> CVE-2013-1773
> 
> BugLink: https://launchpad.net/bugs/1134523
> 
> The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
> its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
> the maximum length of the output buffer or the endianness of its
> 16-bit output.
> 
> This patch (as1501) adds the two missing arguments, and adjusts the
> only two places in the kernel where the function is called.  A
> follow-on patch will add a third caller that does utilize the new
> capabilities.
> 
> The two conversion routines are still annoyingly inconsistent in the
> way they handle invalid byte combinations.  But that's a subject for a
> different patch.
> 
> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
> CC: Clemens Ladisch <clemens@ladisch.de>
> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
> 
> (backported from commit 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd)
> Conflicts:
>     drivers/hv/hv_kvp.c
> Signed-off-by: John Johansen <john.johansen@canonical.com>
> ---
>  fs/fat/namei_vfat.c |    3 ++-
>  fs/nls/nls_base.c   |   43 +++++++++++++++++++++++++++++++++----------
>  include/linux/nls.h |    5 +++--
>  3 files changed, 38 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
> index 72646e2..113738f 100644
> --- a/fs/fat/namei_vfat.c
> +++ b/fs/fat/namei_vfat.c
> @@ -499,7 +499,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
>  	int charlen;
>  
>  	if (utf8) {
> -		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
> +		*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
> +				(wchar_t *) outname, FAT_LFN_LEN + 2);
>  		if (*outlen < 0)
>  			return *outlen;
>  		else if (*outlen > 255)
> diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
> index 44a88a9..0eb059e 100644
> --- a/fs/nls/nls_base.c
> +++ b/fs/nls/nls_base.c
> @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
>  }
>  EXPORT_SYMBOL(utf32_to_utf8);
>  
> -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
> +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
> +{
> +	switch (endian) {
> +	default:
> +		*s = (wchar_t) c;
> +		break;
> +	case UTF16_LITTLE_ENDIAN:
> +		*s = __cpu_to_le16(c);
> +		break;
> +	case UTF16_BIG_ENDIAN:
> +		*s = __cpu_to_be16(c);
> +		break;
> +	}
> +}
> +
> +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
> +		wchar_t *pwcs, int maxlen)
>  {
>  	u16 *op;
>  	int size;
>  	unicode_t u;
>  
>  	op = pwcs;
> -	while (*s && len > 0) {
> +	while (len > 0 && maxlen > 0 && *s) {
>  		if (*s & 0x80) {
>  			size = utf8_to_utf32(s, len, &u);
>  			if (size < 0)
>  				return -EINVAL;
> +			s += size;
> +			len -= size;
>  
>  			if (u >= PLANE_SIZE) {
> +				if (maxlen < 2)
> +					break;
>  				u -= PLANE_SIZE;
> -				*op++ = (wchar_t) (SURROGATE_PAIR |
> -						((u >> 10) & SURROGATE_BITS));
> -				*op++ = (wchar_t) (SURROGATE_PAIR |
> +				put_utf16(op++, SURROGATE_PAIR |
> +						((u >> 10) & SURROGATE_BITS),
> +						endian);
> +				put_utf16(op++, SURROGATE_PAIR |
>  						SURROGATE_LOW |
> -						(u & SURROGATE_BITS));
> +						(u & SURROGATE_BITS),
> +						endian);
> +				maxlen -= 2;
>  			} else {
> -				*op++ = (wchar_t) u;
> +				put_utf16(op++, u, endian);
> +				maxlen--;
>  			}
> -			s += size;
> -			len -= size;
>  		} else {
> -			*op++ = *s++;
> +			put_utf16(op++, *s++, endian);
>  			len--;
> +			maxlen--;
>  		}
>  	}
>  	return op - pwcs;
> diff --git a/include/linux/nls.h b/include/linux/nls.h
> index d47beef..5dc635f 100644
> --- a/include/linux/nls.h
> +++ b/include/linux/nls.h
> @@ -43,7 +43,7 @@ enum utf16_endian {
>  	UTF16_BIG_ENDIAN
>  };
>  
> -/* nls.c */
> +/* nls_base.c */
>  extern int register_nls(struct nls_table *);
>  extern int unregister_nls(struct nls_table *);
>  extern struct nls_table *load_nls(char *);
> @@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void);
>  
>  extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
>  extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
> -extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
> +extern int utf8s_to_utf16s(const u8 *s, int len,
> +		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
>  extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
>  		enum utf16_endian endian, u8 *s, int maxlen);
>  
>

Patch

diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 72646e2..113738f 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -499,7 +499,8 @@  xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 	int charlen;
 
 	if (utf8) {
-		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
+		*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
+				(wchar_t *) outname, FAT_LFN_LEN + 2);
 		if (*outlen < 0)
 			return *outlen;
 		else if (*outlen > 255)
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 44a88a9..0eb059e 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -114,34 +114,57 @@  int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
 }
 EXPORT_SYMBOL(utf32_to_utf8);
 
-int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
+static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
+{
+	switch (endian) {
+	default:
+		*s = (wchar_t) c;
+		break;
+	case UTF16_LITTLE_ENDIAN:
+		*s = __cpu_to_le16(c);
+		break;
+	case UTF16_BIG_ENDIAN:
+		*s = __cpu_to_be16(c);
+		break;
+	}
+}
+
+int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
+		wchar_t *pwcs, int maxlen)
 {
 	u16 *op;
 	int size;
 	unicode_t u;
 
 	op = pwcs;
-	while (*s && len > 0) {
+	while (len > 0 && maxlen > 0 && *s) {
 		if (*s & 0x80) {
 			size = utf8_to_utf32(s, len, &u);
 			if (size < 0)
 				return -EINVAL;
+			s += size;
+			len -= size;
 
 			if (u >= PLANE_SIZE) {
+				if (maxlen < 2)
+					break;
 				u -= PLANE_SIZE;
-				*op++ = (wchar_t) (SURROGATE_PAIR |
-						((u >> 10) & SURROGATE_BITS));
-				*op++ = (wchar_t) (SURROGATE_PAIR |
+				put_utf16(op++, SURROGATE_PAIR |
+						((u >> 10) & SURROGATE_BITS),
+						endian);
+				put_utf16(op++, SURROGATE_PAIR |
 						SURROGATE_LOW |
-						(u & SURROGATE_BITS));
+						(u & SURROGATE_BITS),
+						endian);
+				maxlen -= 2;
 			} else {
-				*op++ = (wchar_t) u;
+				put_utf16(op++, u, endian);
+				maxlen--;
 			}
-			s += size;
-			len -= size;
 		} else {
-			*op++ = *s++;
+			put_utf16(op++, *s++, endian);
 			len--;
+			maxlen--;
 		}
 	}
 	return op - pwcs;
diff --git a/include/linux/nls.h b/include/linux/nls.h
index d47beef..5dc635f 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -43,7 +43,7 @@  enum utf16_endian {
 	UTF16_BIG_ENDIAN
 };
 
-/* nls.c */
+/* nls_base.c */
 extern int register_nls(struct nls_table *);
 extern int unregister_nls(struct nls_table *);
 extern struct nls_table *load_nls(char *);
@@ -52,7 +52,8 @@  extern struct nls_table *load_nls_default(void);
 
 extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
 extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
-extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
+extern int utf8s_to_utf16s(const u8 *s, int len,
+		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
 extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
 		enum utf16_endian endian, u8 *s, int maxlen);