diff mbox series

[e2fsprogs,4/9] mke2fs: Configure encoding during superblock initialization

Message ID 20181015211220.27370-5-krisman@collabora.co.uk
State Superseded
Headers show
Series Support encoding awareness and casefold | expand

Commit Message

Gabriel Krisman Bertazi Oct. 15, 2018, 9:12 p.m. UTC
This patch implements two new extended options to mkefs, allowing the
user to specify an encoding for file name operations and encoding flags
during filesystem creation.  We provide default flags for each encoding,
which the user can overwrite by passing -E encoding-flags to mkfs.
---
 lib/ext2fs/initialize.c |  4 ++++
 misc/mke2fs.c           | 43 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

Comments

Theodore Ts'o Nov. 21, 2018, 4:55 a.m. UTC | #1
On Mon, Oct 15, 2018 at 05:12:15PM -0400, Gabriel Krisman Bertazi wrote:
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index f05003fc30b9..5ed7b987540e 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -790,6 +790,8 @@ static void parse_extended_opts(struct ext2_super_block *param,
>  	int	len;
>  	int	r_usage = 0;
>  	int	ret;
> +	int	encoding = -1;
> +	char 	*encoding_flags = NULL;

    ...

> +	if (ext2fs_has_feature_fname_encoding(param)) {
> +		param->s_encoding_flags =
> +			ext4_encoding_map[encoding].default_flags;

This code is assuming that users will specify the encoding via "-E encoding=utf8-10.0"
and this will set the FNAME_ENCODING flag implicitly.

But consider what happens if the user runs command like this:

    mke2fs -t ext4 -O fname_encoding -E resize=12T

When parse_extended_opts gets called, the variable encoding will still
be -1, and so we'll end up trying to use a negative array index to
ext4_encoding_map[] which will be... unfortunate.

As I mentioned in another e-mail, I'm a bit dubious about having
per-encoding default flags.  Those flags should either global ext4
code points, or they should be forced to specific values given the
encoding that is specified.
 
We probably also want to have a default encoding if the user just
specifies "-O fname_encoding".   Say, in /etc/mke2fs.conf:

[options]
    default_encoding = utf8-11.0

Then at some point a few years from now, we might enable
fname_encoding by default, so we might have in /etc/mke2fs.conf:

[fs_types]
	ext4 = {
		features = has_journal,extent,huge_file,flex_bg,metadata_csum,64bit,dir_nlink,extra_isize,largedir,fname_encoding
		inode_size = 256
	}

So having a way to specify the default encoding in /etc/mke2fs.conf is
going to be important.  What will probably happen is two years, we'll
be up to Unicode 13.0, and we might want to add support for Unicode
13.0 in some future kernel version,, say, 5.8.  But then we won't want
to make utf8-13.0 the default for some amount of time, since if the
file system is mounted on an older kernel, it won't work; the kernel
will have to reject mounting a file system with an unknown encoding.

So that's why I always like to make these sorts of configuration
defaults to be tuneable in /etc/mke2fs.conf.  Different distros will
have different backwards compatibility policies.  For example, For
enterprise distros, they might want to wait 7 years before creating
file systems with utf8-13.0 as the default.  For a community distro,
they might want to wait 2-3 years.  And for a purpose-built Linux
gaming Valve box, where the kernel is under the control of the box
manufacturers, they might want to be super-aggressive about adopting a
new Unicode encoding, in order to crack that critical Ancient Sanskrit
market.  :-)

						- Ted
Gabriel Krisman Bertazi Nov. 21, 2018, 7:43 p.m. UTC | #2
"Theodore Y. Ts'o" <tytso@mit.edu> writes:

> On Mon, Oct 15, 2018 at 05:12:15PM -0400, Gabriel Krisman Bertazi wrote:
>> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
>> index f05003fc30b9..5ed7b987540e 100644
>> --- a/misc/mke2fs.c
>> +++ b/misc/mke2fs.c
>> @@ -790,6 +790,8 @@ static void parse_extended_opts(struct ext2_super_block *param,
>>  	int	len;
>>  	int	r_usage = 0;
>>  	int	ret;
>> +	int	encoding = -1;
>> +	char 	*encoding_flags = NULL;
>
>     ...
>
>> +	if (ext2fs_has_feature_fname_encoding(param)) {
>> +		param->s_encoding_flags =
>> +			ext4_encoding_map[encoding].default_flags;
>
> This code is assuming that users will specify the encoding via "-E encoding=utf8-10.0"
> and this will set the FNAME_ENCODING flag implicitly.
>
> But consider what happens if the user runs command like this:
>
>     mke2fs -t ext4 -O fname_encoding -E resize=12T
>
> When parse_extended_opts gets called, the variable encoding will still
> be -1, and so we'll end up trying to use a negative array index to
> ext4_encoding_map[] which will be... unfortunate.
>
> As I mentioned in another e-mail, I'm a bit dubious about having
> per-encoding default flags.  Those flags should either global ext4
> code points, or they should be forced to specific values given the
> encoding that is specified.

Normalization and casefold types are too specific to each encoding, to
not be per-encoding.  ASCII has no normalization, for instance.

If I understand you correctly, we should make them ext4 code points to
ensure they don't change in the future.

> We probably also want to have a default encoding if the user just
> specifies "-O fname_encoding".   Say, in /etc/mke2fs.conf:

Right.  That solves the case for -O fname_encoding.  I will do this in v3.

>
> [options]
>     default_encoding = utf8-11.0
>
> Then at some point a few years from now, we might enable
> fname_encoding by default, so we might have in /etc/mke2fs.conf:
> [fs_types]
> 	ext4 = {
> 		features = has_journal,extent,huge_file,flex_bg,metadata_csum,64bit,dir_nlink,extra_isize,largedir,fname_encoding
> 		inode_size = 256
> 	}
>
> So having a way to specify the default encoding in /etc/mke2fs.conf is
> going to be important.  What will probably happen is two years, we'll
> be up to Unicode 13.0, and we might want to add support for Unicode
> 13.0 in some future kernel version,, say, 5.8.  But then we won't want
> to make utf8-13.0 the default for some amount of time, since if the
> file system is mounted on an older kernel, it won't work; the kernel
> will have to reject mounting a file system with an unknown encoding.
>
> So that's why I always like to make these sorts of configuration
> defaults to be tuneable in /etc/mke2fs.conf.  Different distros will
> have different backwards compatibility policies.  For example, For
> enterprise distros, they might want to wait 7 years before creating
> file systems with utf8-13.0 as the default.  For a community distro,
> they might want to wait 2-3 years.  And for a purpose-built Linux
> gaming Valve box, where the kernel is under the control of the box
> manufacturers, they might want to be super-aggressive about adopting a
> new Unicode encoding, in order to crack that critical Ancient Sanskrit
> market.  :-)

good point!
diff mbox series

Patch

diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c
index 8c9e97fee831..30b1ae033340 100644
--- a/lib/ext2fs/initialize.c
+++ b/lib/ext2fs/initialize.c
@@ -186,6 +186,10 @@  errcode_t ext2fs_initialize(const char *name, int flags,
 	set_field(s_flags, 0);
 	assign_field(s_backup_bgs[0]);
 	assign_field(s_backup_bgs[1]);
+
+	assign_field(s_encoding);
+	assign_field(s_encoding_flags);
+
 	if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
 		retval = EXT2_ET_UNSUPP_FEATURE;
 		goto cleanup;
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index f05003fc30b9..5ed7b987540e 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -790,6 +790,8 @@  static void parse_extended_opts(struct ext2_super_block *param,
 	int	len;
 	int	r_usage = 0;
 	int	ret;
+	int	encoding = -1;
+	char 	*encoding_flags = NULL;
 
 	len = strlen(opts);
 	buf = malloc(len+1);
@@ -1056,6 +1058,26 @@  static void parse_extended_opts(struct ext2_super_block *param,
 			}
 		} else if (!strcmp(token, "android_sparse")) {
 			android_sparse_file = 1;
+		} else if (!strcmp(token, "encoding")) {
+			if (!arg) {
+				r_usage++;
+				continue;
+			}
+
+			encoding = e2p_str2encoding(arg);
+			if (encoding < 0) {
+				fprintf(stderr, _("Invalid encoding: %s"), arg);
+				r_usage++;
+				continue;
+			}
+			param->s_encoding = encoding;
+			ext2fs_set_feature_fname_encoding(param);
+		} else if (!strcmp(token, "encoding-flags")) {
+			if (!arg) {
+				r_usage++;
+				continue;
+			}
+			encoding_flags = arg;
 		} else {
 			r_usage++;
 			badopt = token;
@@ -1080,6 +1102,8 @@  static void parse_extended_opts(struct ext2_super_block *param,
 			"\ttest_fs\n"
 			"\tdiscard\n"
 			"\tnodiscard\n"
+			"\tencoding=<encoding>\n"
+			"\tencoding-flags=<flags>\n"
 			"\tquotatype=<quota type(s) to be enabled>\n\n"),
 			badopt ? badopt : "");
 		free(buf);
@@ -1091,6 +1115,24 @@  static void parse_extended_opts(struct ext2_super_block *param,
 				  "multiple of stride %u.\n\n"),
 			param->s_raid_stripe_width, param->s_raid_stride);
 
+	if (ext2fs_has_feature_fname_encoding(param)) {
+		param->s_encoding_flags =
+			ext4_encoding_map[encoding].default_flags;
+		if (encoding_flags &&
+		    e2p_str2encoding_flags(encoding, encoding_flags,
+					   &param->s_encoding_flags)) {
+			fprintf(stderr, _("error: Invalid encoding flag: %s\n"),
+				encoding_flags);
+			free(buf);
+			exit(1);
+		}
+	} else if (encoding_flags) {
+		fprintf(stderr, _("error: An encoding must be explicitely "
+				  "specified when passing encoding-flags\n"));
+		free(buf);
+		exit(1);
+	}
+
 	free(buf);
 }
 
@@ -1112,6 +1154,7 @@  static __u32 ok_features[3] = {
 		EXT4_FEATURE_INCOMPAT_64BIT|
 		EXT4_FEATURE_INCOMPAT_INLINE_DATA|
 		EXT4_FEATURE_INCOMPAT_ENCRYPT |
+		EXT4_FEATURE_INCOMPAT_FNAME_ENCODING |
 		EXT4_FEATURE_INCOMPAT_CSUM_SEED |
 		EXT4_FEATURE_INCOMPAT_LARGEDIR,
 	/* R/O compat */