diff mbox series

[RFC,12/20] hfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option

Message ID 20210808162453.1653-13-pali@kernel.org
State New
Headers show
Series fs: Remove usage of broken nls_utf8 and drop it | expand

Commit Message

Pali Rohár Aug. 8, 2021, 4:24 p.m. UTC
NLS table for utf8 is broken and cannot be fixed.

So instead of broken utf8 nls functions char2uni() and uni2char() use
functions utf8_to_utf32() and utf32_to_utf8() which implements correct
encoding and decoding between Unicode code points and UTF-8 sequence.

When iochatset=utf8 is used then set hsb->nls_io to NULL and use it for
distinguish between the fact if NLS table or native UTF-8 functions should
be used.

Signed-off-by: Pali Rohár <pali@kernel.org>
---
 fs/hfs/super.c | 33 ++++++++++++++++++++++-----------
 fs/hfs/trans.c | 24 ++++++++++++++++++++----
 2 files changed, 42 insertions(+), 15 deletions(-)

Comments

Viacheslav Dubeyko Aug. 9, 2021, 5:49 p.m. UTC | #1
> On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
> 
> NLS table for utf8 is broken and cannot be fixed.
> 
> So instead of broken utf8 nls functions char2uni() and uni2char() use
> functions utf8_to_utf32() and utf32_to_utf8() which implements correct
> encoding and decoding between Unicode code points and UTF-8 sequence.
> 
> When iochatset=utf8 is used then set hsb->nls_io to NULL and use it for
> distinguish between the fact if NLS table or native UTF-8 functions should
> be used.
> 
> Signed-off-by: Pali Rohár <pali@kernel.org>
> ---
> fs/hfs/super.c | 33 ++++++++++++++++++++++-----------
> fs/hfs/trans.c | 24 ++++++++++++++++++++----
> 2 files changed, 42 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/hfs/super.c b/fs/hfs/super.c
> index 86bc46746c7f..076308df41cf 100644
> --- a/fs/hfs/super.c
> +++ b/fs/hfs/super.c
> @@ -149,10 +149,13 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root)
> 		seq_printf(seq, ",part=%u", sbi->part);
> 	if (sbi->session >= 0)
> 		seq_printf(seq, ",session=%u", sbi->session);
> -	if (sbi->nls_disk)
> +	if (sbi->nls_disk) {
> 		seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset);

Maybe, I am missing something. But where is the closing “}”?


> -	if (sbi->nls_io)
> -		seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
> +		if (sbi->nls_io)
> +			seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
> +		else
> +			seq_puts(seq, ",iocharset=utf8");
> +	}
> 	if (sbi->s_quiet)
> 		seq_printf(seq, ",quiet");
> 	return 0;
> @@ -225,6 +228,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> 	char *p;
> 	substring_t args[MAX_OPT_ARGS];
> 	int tmp, token;
> +	int have_iocharset;

What’s about boolean type?

> 
> 	/* initialize the sb with defaults */
> 	hsb->s_uid = current_uid();
> @@ -239,6 +243,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> 	if (!options)
> 		return 1;
> 
> +	have_iocharset = 0;

What’s about false here?

> +
> 	while ((p = strsep(&options, ",")) != NULL) {
> 		if (!*p)
> 			continue;
> @@ -332,18 +338,22 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> 			kfree(p);
> 			break;
> 		case opt_iocharset:
> -			if (hsb->nls_io) {
> +			if (have_iocharset) {
> 				pr_err("unable to change iocharset\n");
> 				return 0;
> 			}
> 			p = match_strdup(&args[0]);
> -			if (p)
> -				hsb->nls_io = load_nls(p);
> -			if (!hsb->nls_io) {
> -				pr_err("unable to load iocharset \"%s\"\n", p);
> -				kfree(p);
> +			if (!p)
> 				return 0;
> +			if (strcmp(p, "utf8") != 0) {
> +				hsb->nls_io = load_nls(p);
> +				if (!hsb->nls_io) {
> +					pr_err("unable to load iocharset \"%s\"\n", p);
> +					kfree(p);
> +					return 0;
> +				}
> 			}
> +			have_iocharset = 1;

What’s about true here?

> 			kfree(p);
> 			break;
> 		default:
> @@ -351,7 +361,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> 		}
> 	}
> 
> -	if (hsb->nls_io && !hsb->nls_disk) {
> +	if (have_iocharset && !hsb->nls_disk) {
> 		/*
> 		 * Previous version of hfs driver did something unexpected:
> 		 * When codepage was not defined but iocharset was then
> @@ -382,7 +392,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> 			return 0;
> 		}
> 	}
> -	if (hsb->nls_disk && !hsb->nls_io) {
> +	if (hsb->nls_disk &&
> +	    !have_iocharset && strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0) {

Maybe, introduce the variable to calculate the boolean value here? Then if statement will look much cleaner.

> 		hsb->nls_io = load_nls_default();
> 		if (!hsb->nls_io) {
> 			pr_err("unable to load default iocharset\n");
> diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
> index c75682c61b06..bff8e54003ab 100644
> --- a/fs/hfs/trans.c
> +++ b/fs/hfs/trans.c
> @@ -44,7 +44,7 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
> 		srclen = HFS_NAMELEN;
> 	dst = out;
> 	dstlen = HFS_MAX_NAMELEN;
> -	if (nls_io) {
> +	if (nls_disk) {
> 		wchar_t ch;
> 

I could miss something here. But what’s about the closing “}”?

Thanks,
Slava.

> 		while (srclen > 0) {
> @@ -57,7 +57,12 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
> 			srclen -= size;
> 			if (ch == '/')
> 				ch = ':';
> -			size = nls_io->uni2char(ch, dst, dstlen);
> +			if (nls_io)
> +				size = nls_io->uni2char(ch, dst, dstlen);
> +			else if (dstlen > 0)
> +				size = utf32_to_utf8(ch, dst, dstlen);
> +			else
> +				size = -ENAMETOOLONG;
> 			if (size < 0) {
> 				if (size == -ENAMETOOLONG)
> 					goto out;
> @@ -101,11 +106,22 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
> 	srclen = in->len;
> 	dst = out->name;
> 	dstlen = HFS_NAMELEN;
> -	if (nls_io) {
> +	if (nls_disk) {
> 		wchar_t ch;
> +		unicode_t u;
> 
> 		while (srclen > 0) {
> -			size = nls_io->char2uni(src, srclen, &ch);
> +			if (nls_io)
> +				size = nls_io->char2uni(src, srclen, &ch);
> +			else {
> +				size = utf8_to_utf32(str, strlen, &u);
> +				if (size >= 0) {
> +					if (u <= MAX_WCHAR_T)
> +						ch = u;
> +					else
> +						size = -EINVAL;
> +				}
> +			}
> 			if (size < 0) {
> 				ch = '?';
> 				size = 1;
> -- 
> 2.20.1
>
Pali Rohár Sept. 25, 2022, 12:06 p.m. UTC | #2
Hello! Sorry for a longer delay. Below are comments.

On Monday 09 August 2021 10:49:34 Viacheslav Dubeyko wrote:
> > On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
> > 
> > NLS table for utf8 is broken and cannot be fixed.
> > 
> > So instead of broken utf8 nls functions char2uni() and uni2char() use
> > functions utf8_to_utf32() and utf32_to_utf8() which implements correct
> > encoding and decoding between Unicode code points and UTF-8 sequence.
> > 
> > When iochatset=utf8 is used then set hsb->nls_io to NULL and use it for
> > distinguish between the fact if NLS table or native UTF-8 functions should
> > be used.
> > 
> > Signed-off-by: Pali Rohár <pali@kernel.org>
> > ---
> > fs/hfs/super.c | 33 ++++++++++++++++++++++-----------
> > fs/hfs/trans.c | 24 ++++++++++++++++++++----
> > 2 files changed, 42 insertions(+), 15 deletions(-)
> > 
> > diff --git a/fs/hfs/super.c b/fs/hfs/super.c
> > index 86bc46746c7f..076308df41cf 100644
> > --- a/fs/hfs/super.c
> > +++ b/fs/hfs/super.c
> > @@ -149,10 +149,13 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root)
> > 		seq_printf(seq, ",part=%u", sbi->part);
> > 	if (sbi->session >= 0)
> > 		seq_printf(seq, ",session=%u", sbi->session);
> > -	if (sbi->nls_disk)
> > +	if (sbi->nls_disk) {
> > 		seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset);
> 
> Maybe, I am missing something. But where is the closing “}”?

See below...

> 
> > -	if (sbi->nls_io)
> > -		seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
> > +		if (sbi->nls_io)
> > +			seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
> > +		else
> > +			seq_puts(seq, ",iocharset=utf8");
> > +	}

        ^
... Closing "}" is marked above.

> > 	if (sbi->s_quiet)
> > 		seq_printf(seq, ",quiet");
> > 	return 0;
> > @@ -225,6 +228,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> > 	char *p;
> > 	substring_t args[MAX_OPT_ARGS];
> > 	int tmp, token;
> > +	int have_iocharset;
> 
> What’s about boolean type?

Ok! No problem, I can use "bool" type. Just I was in impression that
code style of this driver is to use "int" type also for booleans.
Same for "false" and "true" as you mentioned below.

> > 
> > 	/* initialize the sb with defaults */
> > 	hsb->s_uid = current_uid();
> > @@ -239,6 +243,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> > 	if (!options)
> > 		return 1;
> > 
> > +	have_iocharset = 0;
> 
> What’s about false here?
> 
> > +
> > 	while ((p = strsep(&options, ",")) != NULL) {
> > 		if (!*p)
> > 			continue;
> > @@ -332,18 +338,22 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> > 			kfree(p);
> > 			break;
> > 		case opt_iocharset:
> > -			if (hsb->nls_io) {
> > +			if (have_iocharset) {
> > 				pr_err("unable to change iocharset\n");
> > 				return 0;
> > 			}
> > 			p = match_strdup(&args[0]);
> > -			if (p)
> > -				hsb->nls_io = load_nls(p);
> > -			if (!hsb->nls_io) {
> > -				pr_err("unable to load iocharset \"%s\"\n", p);
> > -				kfree(p);
> > +			if (!p)
> > 				return 0;
> > +			if (strcmp(p, "utf8") != 0) {
> > +				hsb->nls_io = load_nls(p);
> > +				if (!hsb->nls_io) {
> > +					pr_err("unable to load iocharset \"%s\"\n", p);
> > +					kfree(p);
> > +					return 0;
> > +				}
> > 			}
> > +			have_iocharset = 1;
> 
> What’s about true here?
> 
> > 			kfree(p);
> > 			break;
> > 		default:
> > @@ -351,7 +361,7 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> > 		}
> > 	}
> > 
> > -	if (hsb->nls_io && !hsb->nls_disk) {
> > +	if (have_iocharset && !hsb->nls_disk) {
> > 		/*
> > 		 * Previous version of hfs driver did something unexpected:
> > 		 * When codepage was not defined but iocharset was then
> > @@ -382,7 +392,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> > 			return 0;
> > 		}
> > 	}
> > -	if (hsb->nls_disk && !hsb->nls_io) {
> > +	if (hsb->nls_disk &&
> > +	    !have_iocharset && strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0) {
> 
> Maybe, introduce the variable to calculate the boolean value here? Then if statement will look much cleaner.

I'm not sure how to do it to make code look cleaner.

Currently there is:

if (hsb->nls_disk &&
    !have_iocharset && strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0) {
    hsb->nls_io = load_nls_default();
    ...
}

I can replace it e.g. by:

bool need_to_load_nls;
...
if (hsb->nls_disk &&
    !have_iocharset && strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0)
    need_to_load_nls = true;
else
    need_to_load_nls = false;

if (need_to_load_nls) {
    hsb->nls_io = load_nls_default();
    ...
}

But it is just longer, condition is still there and it requires one
additional variable which more me is less readable because it is longer.

> > 		hsb->nls_io = load_nls_default();
> > 		if (!hsb->nls_io) {
> > 			pr_err("unable to load default iocharset\n");
> > diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
> > index c75682c61b06..bff8e54003ab 100644
> > --- a/fs/hfs/trans.c
> > +++ b/fs/hfs/trans.c
> > @@ -44,7 +44,7 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
> > 		srclen = HFS_NAMELEN;
> > 	dst = out;
> > 	dstlen = HFS_MAX_NAMELEN;
> > -	if (nls_io) {
> > +	if (nls_disk) {
> > 		wchar_t ch;
> > 
> 
> I could miss something here. But what’s about the closing “}”?

Closing "}" is there on the same location as it was. Before my change on
"if" line was opening "{" and also with my change there is opening "{".
So opening "{" and closing "}" are there and matches.

> Thanks,
> Slava.
> 
> > 		while (srclen > 0) {
> > @@ -57,7 +57,12 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
> > 			srclen -= size;
> > 			if (ch == '/')
> > 				ch = ':';
> > -			size = nls_io->uni2char(ch, dst, dstlen);
> > +			if (nls_io)
> > +				size = nls_io->uni2char(ch, dst, dstlen);
> > +			else if (dstlen > 0)
> > +				size = utf32_to_utf8(ch, dst, dstlen);
> > +			else
> > +				size = -ENAMETOOLONG;
> > 			if (size < 0) {
> > 				if (size == -ENAMETOOLONG)
> > 					goto out;
> > @@ -101,11 +106,22 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
> > 	srclen = in->len;
> > 	dst = out->name;
> > 	dstlen = HFS_NAMELEN;
> > -	if (nls_io) {
> > +	if (nls_disk) {
> > 		wchar_t ch;
> > +		unicode_t u;
> > 
> > 		while (srclen > 0) {
> > -			size = nls_io->char2uni(src, srclen, &ch);
> > +			if (nls_io)
> > +				size = nls_io->char2uni(src, srclen, &ch);
> > +			else {
> > +				size = utf8_to_utf32(str, strlen, &u);
> > +				if (size >= 0) {
> > +					if (u <= MAX_WCHAR_T)
> > +						ch = u;
> > +					else
> > +						size = -EINVAL;
> > +				}
> > +			}
> > 			if (size < 0) {
> > 				ch = '?';
> > 				size = 1;
> > -- 
> > 2.20.1
> > 
>
diff mbox series

Patch

diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 86bc46746c7f..076308df41cf 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -149,10 +149,13 @@  static int hfs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",part=%u", sbi->part);
 	if (sbi->session >= 0)
 		seq_printf(seq, ",session=%u", sbi->session);
-	if (sbi->nls_disk)
+	if (sbi->nls_disk) {
 		seq_printf(seq, ",codepage=%s", sbi->nls_disk->charset);
-	if (sbi->nls_io)
-		seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
+		if (sbi->nls_io)
+			seq_printf(seq, ",iocharset=%s", sbi->nls_io->charset);
+		else
+			seq_puts(seq, ",iocharset=utf8");
+	}
 	if (sbi->s_quiet)
 		seq_printf(seq, ",quiet");
 	return 0;
@@ -225,6 +228,7 @@  static int parse_options(char *options, struct hfs_sb_info *hsb)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int tmp, token;
+	int have_iocharset;
 
 	/* initialize the sb with defaults */
 	hsb->s_uid = current_uid();
@@ -239,6 +243,8 @@  static int parse_options(char *options, struct hfs_sb_info *hsb)
 	if (!options)
 		return 1;
 
+	have_iocharset = 0;
+
 	while ((p = strsep(&options, ",")) != NULL) {
 		if (!*p)
 			continue;
@@ -332,18 +338,22 @@  static int parse_options(char *options, struct hfs_sb_info *hsb)
 			kfree(p);
 			break;
 		case opt_iocharset:
-			if (hsb->nls_io) {
+			if (have_iocharset) {
 				pr_err("unable to change iocharset\n");
 				return 0;
 			}
 			p = match_strdup(&args[0]);
-			if (p)
-				hsb->nls_io = load_nls(p);
-			if (!hsb->nls_io) {
-				pr_err("unable to load iocharset \"%s\"\n", p);
-				kfree(p);
+			if (!p)
 				return 0;
+			if (strcmp(p, "utf8") != 0) {
+				hsb->nls_io = load_nls(p);
+				if (!hsb->nls_io) {
+					pr_err("unable to load iocharset \"%s\"\n", p);
+					kfree(p);
+					return 0;
+				}
 			}
+			have_iocharset = 1;
 			kfree(p);
 			break;
 		default:
@@ -351,7 +361,7 @@  static int parse_options(char *options, struct hfs_sb_info *hsb)
 		}
 	}
 
-	if (hsb->nls_io && !hsb->nls_disk) {
+	if (have_iocharset && !hsb->nls_disk) {
 		/*
 		 * Previous version of hfs driver did something unexpected:
 		 * When codepage was not defined but iocharset was then
@@ -382,7 +392,8 @@  static int parse_options(char *options, struct hfs_sb_info *hsb)
 			return 0;
 		}
 	}
-	if (hsb->nls_disk && !hsb->nls_io) {
+	if (hsb->nls_disk &&
+	    !have_iocharset && strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0) {
 		hsb->nls_io = load_nls_default();
 		if (!hsb->nls_io) {
 			pr_err("unable to load default iocharset\n");
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index c75682c61b06..bff8e54003ab 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -44,7 +44,7 @@  int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
 		srclen = HFS_NAMELEN;
 	dst = out;
 	dstlen = HFS_MAX_NAMELEN;
-	if (nls_io) {
+	if (nls_disk) {
 		wchar_t ch;
 
 		while (srclen > 0) {
@@ -57,7 +57,12 @@  int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
 			srclen -= size;
 			if (ch == '/')
 				ch = ':';
-			size = nls_io->uni2char(ch, dst, dstlen);
+			if (nls_io)
+				size = nls_io->uni2char(ch, dst, dstlen);
+			else if (dstlen > 0)
+				size = utf32_to_utf8(ch, dst, dstlen);
+			else
+				size = -ENAMETOOLONG;
 			if (size < 0) {
 				if (size == -ENAMETOOLONG)
 					goto out;
@@ -101,11 +106,22 @@  void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
 	srclen = in->len;
 	dst = out->name;
 	dstlen = HFS_NAMELEN;
-	if (nls_io) {
+	if (nls_disk) {
 		wchar_t ch;
+		unicode_t u;
 
 		while (srclen > 0) {
-			size = nls_io->char2uni(src, srclen, &ch);
+			if (nls_io)
+				size = nls_io->char2uni(src, srclen, &ch);
+			else {
+				size = utf8_to_utf32(str, strlen, &u);
+				if (size >= 0) {
+					if (u <= MAX_WCHAR_T)
+						ch = u;
+					else
+						size = -EINVAL;
+				}
+			}
 			if (size < 0) {
 				ch = '?';
 				size = 1;