diff mbox series

[RFC,11/20] hfs: Explicitly set hsb->nls_disk when hsb->nls_io is set

Message ID 20210808162453.1653-12-pali@kernel.org
State New
Headers show
Series fs: Remove usage of broken nls_utf8 and drop it | expand

Commit Message

Pali Rohár Aug. 8, 2021, 4:24 p.m. UTC
It does not make any sense to set hsb->nls_io (NLS iocharset used between
VFS and hfs driver) when hsb->nls_disk (NLS codepage used between hfs
driver and disk) is not set.

Reverse engineering driver code shown what is doing in this special case:

    When codepage was not defined but iocharset was then
    hfs driver copied 8bit character from disk directly to
    16bit unicode wchar_t type. Which means it did conversion
    from Latin1 (ISO-8859-1) to Unicode because first 256
    Unicode code points matches 8bit ISO-8859-1 codepage table.
    So when iocharset was specified and codepage not, then
    codepage used implicit value "iso8859-1".

So when hsb->nls_disk is not set and hsb->nls_io is then explicitly set
hsb->nls_disk to "iso8859-1".

Such setup is obviously incompatible with Mac OS systems as they do not
support iso8859-1 encoding for hfs. So print warning into dmesg about this
fact.

After this change hsb->nls_disk is always set, so remove code paths for
case when hsb->nls_disk was not set as they are not needed anymore.

Signed-off-by: Pali Rohár <pali@kernel.org>
---
 fs/hfs/super.c | 31 +++++++++++++++++++++++++++++++
 fs/hfs/trans.c | 38 ++++++++++++++------------------------
 2 files changed, 45 insertions(+), 24 deletions(-)

Comments

Viacheslav Dubeyko Aug. 9, 2021, 5:31 p.m. UTC | #1
> On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
> 
> It does not make any sense to set hsb->nls_io (NLS iocharset used between
> VFS and hfs driver) when hsb->nls_disk (NLS codepage used between hfs
> driver and disk) is not set.
> 
> Reverse engineering driver code shown what is doing in this special case:
> 
>    When codepage was not defined but iocharset was then
>    hfs driver copied 8bit character from disk directly to
>    16bit unicode wchar_t type. Which means it did conversion
>    from Latin1 (ISO-8859-1) to Unicode because first 256
>    Unicode code points matches 8bit ISO-8859-1 codepage table.
>    So when iocharset was specified and codepage not, then
>    codepage used implicit value "iso8859-1".
> 
> So when hsb->nls_disk is not set and hsb->nls_io is then explicitly set
> hsb->nls_disk to "iso8859-1".
> 
> Such setup is obviously incompatible with Mac OS systems as they do not
> support iso8859-1 encoding for hfs. So print warning into dmesg about this
> fact.
> 
> After this change hsb->nls_disk is always set, so remove code paths for
> case when hsb->nls_disk was not set as they are not needed anymore.
> 


Sounds reasonable. But it will be great to know that the change has been tested reasonably well.

Thanks,
Slava.


> Signed-off-by: Pali Rohár <pali@kernel.org>
> ---
> fs/hfs/super.c | 31 +++++++++++++++++++++++++++++++
> fs/hfs/trans.c | 38 ++++++++++++++------------------------
> 2 files changed, 45 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/hfs/super.c b/fs/hfs/super.c
> index 12d9bae39363..86bc46746c7f 100644
> --- a/fs/hfs/super.c
> +++ b/fs/hfs/super.c
> @@ -351,6 +351,37 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
> 		}
> 	}
> 
> +	if (hsb->nls_io && !hsb->nls_disk) {
> +		/*
> +		 * Previous version of hfs driver did something unexpected:
> +		 * When codepage was not defined but iocharset was then
> +		 * hfs driver copied 8bit character from disk directly to
> +		 * 16bit unicode wchar_t type. Which means it did conversion
> +		 * from Latin1 (ISO-8859-1) to Unicode because first 256
> +		 * Unicode code points matches 8bit ISO-8859-1 codepage table.
> +		 * So when iocharset was specified and codepage not, then
> +		 * codepage used implicit value "iso8859-1".
> +		 *
> +		 * To not change this previous default behavior as some users
> +		 * may depend on it, we load iso8859-1 NLS table explicitly
> +		 * to simplify code and make it more reable what happens.
> +		 *
> +		 * In context of hfs driver it is really strange to use
> +		 * ISO-8859-1 codepage table for storing data to disk, but
> +		 * nothing forbids it. Just it is highly incompatible with
> +		 * Mac OS systems. So via pr_warn() inform user that this
> +		 * is not probably what he wants.
> +		 */
> +		pr_warn("iocharset was specified but codepage not, "
> +			"using default codepage=iso8859-1\n");
> +		pr_warn("this default codepage=iso8859-1 is incompatible with "
> +			"Mac OS systems and may be changed in the future");
> +		hsb->nls_disk = load_nls("iso8859-1");
> +		if (!hsb->nls_disk) {
> +			pr_err("unable to load iso8859-1 codepage\n");
> +			return 0;
> +		}
> +	}
> 	if (hsb->nls_disk && !hsb->nls_io) {
> 		hsb->nls_io = load_nls_default();
> 		if (!hsb->nls_io) {
> diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
> index 39f5e343bf4d..c75682c61b06 100644
> --- a/fs/hfs/trans.c
> +++ b/fs/hfs/trans.c
> @@ -48,18 +48,13 @@ int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
> 		wchar_t ch;
> 
> 		while (srclen > 0) {
> -			if (nls_disk) {
> -				size = nls_disk->char2uni(src, srclen, &ch);
> -				if (size <= 0) {
> -					ch = '?';
> -					size = 1;
> -				}
> -				src += size;
> -				srclen -= size;
> -			} else {
> -				ch = *src++;
> -				srclen--;
> +			size = nls_disk->char2uni(src, srclen, &ch);
> +			if (size <= 0) {
> +				ch = '?';
> +				size = 1;
> 			}
> +			src += size;
> +			srclen -= size;
> 			if (ch == '/')
> 				ch = ':';
> 			size = nls_io->uni2char(ch, dst, dstlen);
> @@ -119,20 +114,15 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
> 			srclen -= size;
> 			if (ch == ':')
> 				ch = '/';
> -			if (nls_disk) {
> -				size = nls_disk->uni2char(ch, dst, dstlen);
> -				if (size < 0) {
> -					if (size == -ENAMETOOLONG)
> -						goto out;
> -					*dst = '?';
> -					size = 1;
> -				}
> -				dst += size;
> -				dstlen -= size;
> -			} else {
> -				*dst++ = ch > 0xff ? '?' : ch;
> -				dstlen--;
> +			size = nls_disk->uni2char(ch, dst, dstlen);
> +			if (size < 0) {
> +				if (size == -ENAMETOOLONG)
> +					goto out;
> +				*dst = '?';
> +				size = 1;
> 			}
> +			dst += size;
> +			dstlen -= size;
> 		}
> 	} else {
> 		char ch;
> -- 
> 2.20.1
>
Matthew Wilcox Aug. 9, 2021, 5:37 p.m. UTC | #2
On Mon, Aug 09, 2021 at 10:31:55AM -0700, Viacheslav Dubeyko wrote:
> > On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
> > 
> > It does not make any sense to set hsb->nls_io (NLS iocharset used between
> > VFS and hfs driver) when hsb->nls_disk (NLS codepage used between hfs
> > driver and disk) is not set.
> > 
> > Reverse engineering driver code shown what is doing in this special case:
> > 
> >    When codepage was not defined but iocharset was then
> >    hfs driver copied 8bit character from disk directly to
> >    16bit unicode wchar_t type. Which means it did conversion
> >    from Latin1 (ISO-8859-1) to Unicode because first 256
> >    Unicode code points matches 8bit ISO-8859-1 codepage table.
> >    So when iocharset was specified and codepage not, then
> >    codepage used implicit value "iso8859-1".
> > 
> > So when hsb->nls_disk is not set and hsb->nls_io is then explicitly set
> > hsb->nls_disk to "iso8859-1".
> > 
> > Such setup is obviously incompatible with Mac OS systems as they do not
> > support iso8859-1 encoding for hfs. So print warning into dmesg about this
> > fact.
> > 
> > After this change hsb->nls_disk is always set, so remove code paths for
> > case when hsb->nls_disk was not set as they are not needed anymore.
> 
> 
> Sounds reasonable. But it will be great to know that the change has been tested reasonably well.

I don't think it's reasonable to ask Pali to test every single filesystem.
That's something the maintainer should do, as you're more likely to have
the infrastructure already set up to do testing of your filesystem and
be aware of fun corner cases and use cases than someone who's working
across all filesystems.
Pali Rohár Aug. 9, 2021, 5:47 p.m. UTC | #3
On Monday 09 August 2021 18:37:19 Matthew Wilcox wrote:
> On Mon, Aug 09, 2021 at 10:31:55AM -0700, Viacheslav Dubeyko wrote:
> > > On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
> > > 
> > > It does not make any sense to set hsb->nls_io (NLS iocharset used between
> > > VFS and hfs driver) when hsb->nls_disk (NLS codepage used between hfs
> > > driver and disk) is not set.
> > > 
> > > Reverse engineering driver code shown what is doing in this special case:
> > > 
> > >    When codepage was not defined but iocharset was then
> > >    hfs driver copied 8bit character from disk directly to
> > >    16bit unicode wchar_t type. Which means it did conversion
> > >    from Latin1 (ISO-8859-1) to Unicode because first 256
> > >    Unicode code points matches 8bit ISO-8859-1 codepage table.
> > >    So when iocharset was specified and codepage not, then
> > >    codepage used implicit value "iso8859-1".
> > > 
> > > So when hsb->nls_disk is not set and hsb->nls_io is then explicitly set
> > > hsb->nls_disk to "iso8859-1".
> > > 
> > > Such setup is obviously incompatible with Mac OS systems as they do not
> > > support iso8859-1 encoding for hfs. So print warning into dmesg about this
> > > fact.
> > > 
> > > After this change hsb->nls_disk is always set, so remove code paths for
> > > case when hsb->nls_disk was not set as they are not needed anymore.
> > 
> > 
> > Sounds reasonable. But it will be great to know that the change has been tested reasonably well.
> 
> I don't think it's reasonable to ask Pali to test every single filesystem.
> That's something the maintainer should do, as you're more likely to have
> the infrastructure already set up to do testing of your filesystem and
> be aware of fun corner cases and use cases than someone who's working
> across all filesystems.

This patch series is currently in RFC form, as stated in cover letter
mostly untested. So they are not in form for merging or detailed
reviewing. I just would like to know if this is the right direction with
filesystems and if I should continue with this my effort or not.
And I thought that sending RFC "incomplete" patches is better way than
just describing what to do and how...
Viacheslav Dubeyko Aug. 9, 2021, 6 p.m. UTC | #4
> On Aug 9, 2021, at 10:37 AM, Matthew Wilcox <willy@infradead.org> wrote:
> 
> On Mon, Aug 09, 2021 at 10:31:55AM -0700, Viacheslav Dubeyko wrote:
>>> On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
>>> 
>>> It does not make any sense to set hsb->nls_io (NLS iocharset used between
>>> VFS and hfs driver) when hsb->nls_disk (NLS codepage used between hfs
>>> driver and disk) is not set.
>>> 
>>> Reverse engineering driver code shown what is doing in this special case:
>>> 
>>>   When codepage was not defined but iocharset was then
>>>   hfs driver copied 8bit character from disk directly to
>>>   16bit unicode wchar_t type. Which means it did conversion
>>>   from Latin1 (ISO-8859-1) to Unicode because first 256
>>>   Unicode code points matches 8bit ISO-8859-1 codepage table.
>>>   So when iocharset was specified and codepage not, then
>>>   codepage used implicit value "iso8859-1".
>>> 
>>> So when hsb->nls_disk is not set and hsb->nls_io is then explicitly set
>>> hsb->nls_disk to "iso8859-1".
>>> 
>>> Such setup is obviously incompatible with Mac OS systems as they do not
>>> support iso8859-1 encoding for hfs. So print warning into dmesg about this
>>> fact.
>>> 
>>> After this change hsb->nls_disk is always set, so remove code paths for
>>> case when hsb->nls_disk was not set as they are not needed anymore.
>> 
>> 
>> Sounds reasonable. But it will be great to know that the change has been tested reasonably well.
> 
> I don't think it's reasonable to ask Pali to test every single filesystem.
> That's something the maintainer should do, as you're more likely to have
> the infrastructure already set up to do testing of your filesystem and
> be aware of fun corner cases and use cases than someone who's working
> across all filesystems.

I see the point. But the whole approach needs to be tested as minimum for one particular file system. :) And it could be any favorite one.

Thanks,
Slava.
Steve French Aug. 9, 2021, 8:43 p.m. UTC | #5
For cifs.ko, I don't mind running our automated regression tests on
this patch when the patch (or patches) is ready, but was thinking
about an earlier discussion a few months about parth conversion in
cifs.ko prompted by Al Viro, and whether additional changes should be
made to move the character conversion later as well (e.g. for
characters in the reserved range such as '\' to 0xF026, and'':' to
0xF022  and '>' to 0xF024 and '?' to 0xF025 etc) for the 10 special
characters which have to get remapped into the UCS-2 reserved
character range.

On Mon, Aug 9, 2021 at 12:49 PM Pali Rohár <pali@kernel.org> wrote:
>
> On Monday 09 August 2021 18:37:19 Matthew Wilcox wrote:
> > On Mon, Aug 09, 2021 at 10:31:55AM -0700, Viacheslav Dubeyko wrote:
> > > > On Aug 8, 2021, at 9:24 AM, Pali Rohár <pali@kernel.org> wrote:
> > > >
> > > > It does not make any sense to set hsb->nls_io (NLS iocharset used between
> > > > VFS and hfs driver) when hsb->nls_disk (NLS codepage used between hfs
> > > > driver and disk) is not set.
> > > >
> > > > Reverse engineering driver code shown what is doing in this special case:
> > > >
> > > >    When codepage was not defined but iocharset was then
> > > >    hfs driver copied 8bit character from disk directly to
> > > >    16bit unicode wchar_t type. Which means it did conversion
> > > >    from Latin1 (ISO-8859-1) to Unicode because first 256
> > > >    Unicode code points matches 8bit ISO-8859-1 codepage table.
> > > >    So when iocharset was specified and codepage not, then
> > > >    codepage used implicit value "iso8859-1".
> > > >
> > > > So when hsb->nls_disk is not set and hsb->nls_io is then explicitly set
> > > > hsb->nls_disk to "iso8859-1".
> > > >
> > > > Such setup is obviously incompatible with Mac OS systems as they do not
> > > > support iso8859-1 encoding for hfs. So print warning into dmesg about this
> > > > fact.
> > > >
> > > > After this change hsb->nls_disk is always set, so remove code paths for
> > > > case when hsb->nls_disk was not set as they are not needed anymore.
> > >
> > >
> > > Sounds reasonable. But it will be great to know that the change has been tested reasonably well.
> >
> > I don't think it's reasonable to ask Pali to test every single filesystem.
> > That's something the maintainer should do, as you're more likely to have
> > the infrastructure already set up to do testing of your filesystem and
> > be aware of fun corner cases and use cases than someone who's working
> > across all filesystems.
>
> This patch series is currently in RFC form, as stated in cover letter
> mostly untested. So they are not in form for merging or detailed
> reviewing. I just would like to know if this is the right direction with
> filesystems and if I should continue with this my effort or not.
> And I thought that sending RFC "incomplete" patches is better way than
> just describing what to do and how...
diff mbox series

Patch

diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 12d9bae39363..86bc46746c7f 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -351,6 +351,37 @@  static int parse_options(char *options, struct hfs_sb_info *hsb)
 		}
 	}
 
+	if (hsb->nls_io && !hsb->nls_disk) {
+		/*
+		 * Previous version of hfs driver did something unexpected:
+		 * When codepage was not defined but iocharset was then
+		 * hfs driver copied 8bit character from disk directly to
+		 * 16bit unicode wchar_t type. Which means it did conversion
+		 * from Latin1 (ISO-8859-1) to Unicode because first 256
+		 * Unicode code points matches 8bit ISO-8859-1 codepage table.
+		 * So when iocharset was specified and codepage not, then
+		 * codepage used implicit value "iso8859-1".
+		 *
+		 * To not change this previous default behavior as some users
+		 * may depend on it, we load iso8859-1 NLS table explicitly
+		 * to simplify code and make it more reable what happens.
+		 *
+		 * In context of hfs driver it is really strange to use
+		 * ISO-8859-1 codepage table for storing data to disk, but
+		 * nothing forbids it. Just it is highly incompatible with
+		 * Mac OS systems. So via pr_warn() inform user that this
+		 * is not probably what he wants.
+		 */
+		pr_warn("iocharset was specified but codepage not, "
+			"using default codepage=iso8859-1\n");
+		pr_warn("this default codepage=iso8859-1 is incompatible with "
+			"Mac OS systems and may be changed in the future");
+		hsb->nls_disk = load_nls("iso8859-1");
+		if (!hsb->nls_disk) {
+			pr_err("unable to load iso8859-1 codepage\n");
+			return 0;
+		}
+	}
 	if (hsb->nls_disk && !hsb->nls_io) {
 		hsb->nls_io = load_nls_default();
 		if (!hsb->nls_io) {
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index 39f5e343bf4d..c75682c61b06 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -48,18 +48,13 @@  int hfs_mac2asc(struct super_block *sb, char *out, const struct hfs_name *in)
 		wchar_t ch;
 
 		while (srclen > 0) {
-			if (nls_disk) {
-				size = nls_disk->char2uni(src, srclen, &ch);
-				if (size <= 0) {
-					ch = '?';
-					size = 1;
-				}
-				src += size;
-				srclen -= size;
-			} else {
-				ch = *src++;
-				srclen--;
+			size = nls_disk->char2uni(src, srclen, &ch);
+			if (size <= 0) {
+				ch = '?';
+				size = 1;
 			}
+			src += size;
+			srclen -= size;
 			if (ch == '/')
 				ch = ':';
 			size = nls_io->uni2char(ch, dst, dstlen);
@@ -119,20 +114,15 @@  void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
 			srclen -= size;
 			if (ch == ':')
 				ch = '/';
-			if (nls_disk) {
-				size = nls_disk->uni2char(ch, dst, dstlen);
-				if (size < 0) {
-					if (size == -ENAMETOOLONG)
-						goto out;
-					*dst = '?';
-					size = 1;
-				}
-				dst += size;
-				dstlen -= size;
-			} else {
-				*dst++ = ch > 0xff ? '?' : ch;
-				dstlen--;
+			size = nls_disk->uni2char(ch, dst, dstlen);
+			if (size < 0) {
+				if (size == -ENAMETOOLONG)
+					goto out;
+				*dst = '?';
+				size = 1;
 			}
+			dst += size;
+			dstlen -= size;
 		}
 	} else {
 		char ch;