diff mbox

Patch adding e2p_feature_to_string

Message ID CAC80aehVQyWfkd6X339n2bhkDU574hTMWEKXcxzeJrwP4K8E+A@mail.gmail.com
State Accepted, archived
Headers show

Commit Message

👾 Wade Dec. 17, 2014, 7:27 p.m. UTC
This is needed to provide a version of e2p_feature2string that doesn't
use (and return a pointer to) an internal, static buffer.

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Andreas Dilger Dec. 17, 2014, 8:30 p.m. UTC | #1
On Dec 17, 2014, at 12:27 PM, Wade <wadetregaskis@google.com> wrote:
> 
> This is needed to provide a version of e2p_feature2string that doesn't
> use (and return a pointer to) an internal, static buffer.

What's wrong with a pointer to a static string?  Since the strings are
static, none of the callers can change them.  Since they are just pointers,
they can be shared by any number of callers.

More inline below.

> diff --git a/lib/e2p/e2p.h b/lib/e2p/e2p.h
> index 5fa41f4..53f0183 100644
> --- a/lib/e2p/e2p.h
> +++ b/lib/e2p/e2p.h
> @@ -45,6 +45,8 @@ void print_fs_state (FILE * f, unsigned short state);
> int setflags (int fd, unsigned long flags);
> int setversion (int fd, unsigned long version);
> 
> +void e2p_feature_to_string(int compat, unsigned int mask, char *buf,
> +                          size_t buf_len);
> const char *e2p_feature2string(int compat, unsigned int mask);
> const char *e2p_jrnl_feature2string(int compat, unsigned int mask);
> int e2p_string2feature(char *string, int *compat, unsigned int *mask);
> diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
> index 6e53cfe..6dca315 100644
> --- a/lib/e2p/feature.c
> +++ b/lib/e2p/feature.c
> @@ -117,17 +117,20 @@ static struct feature jrnl_feature_list[] = {
>        {       0, 0, 0 },
> };
> 
> -const char *e2p_feature2string(int compat, unsigned int mask)
> +void e2p_feature_to_string(int compat, unsigned int mask, char *buf,
> +                           size_t buf_len)
> {
>        struct feature  *f;
> -       static char buf[20];
>        char    fchar;
>        int     fnum;
> 
>        for (f = feature_list; f->string; f++) {
>                if ((compat == f->compat) &&
> -                   (mask == f->mask))
> -                       return f->string;
> +                   (mask == f->mask)) {
> +                       strncpy(buf, f->string, buf_len);
> +                       buf[buf_len - 1] = 0;
> +                       return;
> +                }
>        }

This adds a strncpy() for every caller, even when it isn't necessary
for callers that only want the existing e2p_feature2string() function.

>        switch (compat) {
>        case  E2P_FEATURE_COMPAT:
> @@ -144,7 +147,13 @@ const char *e2p_feature2string(int compat,
> unsigned int mask)
>                break;
>        }
>        for (fnum = 0; mask >>= 1; fnum++);
> -       sprintf(buf, "FEATURE_%c%d", fchar, fnum);
> +       snprintf(buf, buf_len, "FEATURE_%c%d", fchar, fnum);
> +}
> +
> +const char *e2p_feature2string(int compat, unsigned int mask)
> +{
> +       static char buf[20];
> +       e2p_feature_to_string(compat, mask, buf, sizeof(buf) / sizeof(buf[0]));

This is copying into a single shared buffer for all callers, which is
not thread safe.


A better approach would be to make e2p_feature_to_string() (or maybe
a better name like e2p_feature_string_copy() that distinguishes it
from the existing function) call e2p_feature2string() internally so
the copy is only in the function where it is needed:

char *e2p_feature_string_copy(int compat, unsigned int mask, char *buf,
			      size_t buf_len)
{
        const char *feature = e2p_feature2string(compat, mask);

        strncpy(buf, feature, buf_len);
        buf[buf_len - 1] = '\0';

        return buf;        
}

Note that this version also returns "buf" since this can be convenient
for the caller in some cases, and doesn't hurt.


Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
👾 Wade Dec. 18, 2014, 12:03 a.m. UTC | #2
> What's wrong with a pointer to a static string?  Since the strings are
>
> static, none of the callers can change them.  Since they are just pointers,
> they can be shared by any number of callers.

The existing code doesn't always return a static string - sometimes it
sprintfs one up...

> > +const char *e2p_feature2string(int compat, unsigned int mask)
> > +{
> > +       static char buf[20];
> > +       e2p_feature_to_string(compat, mask, buf, sizeof(buf) / sizeof(buf[0]));
>
> This is copying into a single shared buffer for all callers, which is
> not thread safe.

... exactly.
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Dec. 18, 2014, 5:24 a.m. UTC | #3
On Dec 17, 2014, at 5:03 PM, Wade <wadetregaskis@google.com> wrote:
> 
>> What's wrong with a pointer to a static string?  Since the strings are
>> 
>> static, none of the callers can change them.  Since they are just pointers, they can be shared by any number of callers.
> 
> The existing code doesn't always return a static string - sometimes it
> sprintfs one up...

I see that now.  That said, your version now does this for _every_ caller
of e2p_feature2string(), while the original code only did it in the rare
case of an unsupported feature bit set on on a filesystem.  I don't see
that as an improvement over the original.

One option, maybe not very pretty but not fatally ugly, is to just
initialize all of the "unknown" features as static strings.  That ends
up being 23 extra "FEATURE_Cnn" strings, 21 extra "FEATURE_Rnn" strings,
and 17 extra "FEATURE_Inn" strings.

That ends up being less than 1KB of static text, which isn't bad at all.

This could potentially speed up e2p_feature2string(), since it could
index directly into three different arrays instead of having to scan all
of them to find the right f_compat type first.  I'm not sure if it really
makes a noticeable difference in the end though.

>>> +const char *e2p_feature2string(int compat, unsigned int mask)
>>> +{
>>> +       static char buf[20];
>>> +       e2p_feature_to_string(compat, mask, buf, sizeof(buf) / sizeof(buf[0]));
>> 
>> This is copying into a single shared buffer for all callers, which is
>> not thread safe.
> 
> ... exactly.

So I'm not against your version, but not at the expense of making the
existing code worse.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
👾 Wade Dec. 18, 2014, 6:23 p.m. UTC | #4
> I see that now.  That said, your version now does this for _every_ caller
> of e2p_feature2string(), while the original code only did it in the rare
> case of an unsupported feature bit set on on a filesystem.  I don't see
> that as an improvement over the original.

I do - the existing code is causing data corruption in my use.

I'm unsure why the string copy is problematic - this doesn't seem to
be performance-sensitive code.  As far as I could find, all uses of
this function in the e2fsprogs codebase are just printfing the result,
in human-readable format.  Or for debugging.  ?

> One option, maybe not very pretty but not fatally ugly, is to just
> initialize all of the "unknown" features as static strings.  That ends
> up being 23 extra "FEATURE_Cnn" strings, 21 extra "FEATURE_Rnn" strings,
> and 17 extra "FEATURE_Inn" strings.
>
> That ends up being less than 1KB of static text, which isn't bad at all.
>
> This could potentially speed up e2p_feature2string(), since it could
> index directly into three different arrays instead of having to scan all
> of them to find the right f_compat type first.  I'm not sure if it really
> makes a noticeable difference in the end though.

I can do either, if you insist.  What would you prefer:

a) Statically defining the various FEATURE_foo strings, as you describe.
b) Duplicating the implementation of the two functions so that
e2p_feature2string() can retain its prior behaviour of returning a
string constant in the common cases.

Or alternatively, I suppose:

c) Use a thread-local internal buffer to return the results, so it's
at least thread-safe (though still not really safe, since multiple
calls on one line - e.g. as printf arguments - could still evaluate to
incorrect results).
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andreas Dilger Dec. 18, 2014, 9:58 p.m. UTC | #5
On Dec 18, 2014, at 11:23 AM, Wade <wadetregaskis@google.com> wrote:
>> I see that now.  That said, your version now does this for _every_ caller
>> of e2p_feature2string(), while the original code only did it in the rare
>> case of an unsupported feature bit set on on a filesystem.  I don't see
>> that as an improvement over the original.
> 
> I do - the existing code is causing data corruption in my use.



That you are hitting this problem is interesting in itself, since it
means either that your superblock is corrupt, or you are using some
filesystem features that e2fsprogs doesn't understand, since that is
the only case that a shared buffer is used.  It would make sense in
the latter case to patch lib/e2p/feature.c to add in a proper name
for your feature.

It is really a bad idea to be using a feature bit that isn't at least
_registered_ in e2fsprogs, even if the full support isn't there, since
you run a real risk of someone else using that feature bit for some
completely different feature, and mass problems may result (e.g. e2fsck
incorrectly thinking it understands the feature when it doesn't).

Ted has definitely accepted reservations of feature flags in the past
for features under development or not in the mainline kernel.

> I'm unsure why the string copy is problematic - this doesn't seem to
> be performance-sensitive code.  As far as I could find, all uses of
> this function in the e2fsprogs codebase are just printfing the result,
> in human-readable format.  Or for debugging.  ?

Sorry to not be clear earlier - my objection was that your patch is
making the common use cases (i.e. when only known features are used)
for e2p_feature2string() become non-thread safe and more likely to
hit corruption.  It wasn't about the string copy, nor am I against
fixing this code to be thread safe.  While the uses of this function
in e2fsprogs are all single-threaded, there is clearly external code
that is using this function (yours is proof positive of this), and
we don't want to break those while fixing your uncommon use.

>> One option, maybe not very pretty but not fatally ugly, is to just
>> initialize all of the "unknown" features as static strings.  That ends
>> up being 23 extra "FEATURE_Cnn" strings, 21 extra "FEATURE_Rnn" strings,
>> and 17 extra "FEATURE_Inn" strings.
>> 
>> That ends up being less than 1KB of static text, which isn't bad at all.
>> 
>> This could potentially speed up e2p_feature2string(), since it could
>> index directly into three different arrays instead of having to scan all
>> of them to find the right f_compat type first.  I'm not sure if it really
>> makes a noticeable difference in the end though.
> 
> I can do either, if you insist.  What would you prefer:
> 
> a) Statically defining the various FEATURE_foo strings, as you describe.
> b) Duplicating the implementation of the two functions so that
> e2p_feature2string() can retain its prior behaviour of returning a
> string constant in the common cases.

The (b) case could also extract the common code into helpers like:

static const char *known_feature2string(int compat, unsigned int mask)
{
	struct feature  *f;

	for (f = feature_list; f->string; f++) {
		if (compat == f->compat && mask == f->mask)
			return f->string;
	}

	return NULL;
}

and

static char *unknown_feature2string(int compat, unsigned int mask,
				    char *buf, int buf_len)
{
	char fchar;
	int fnum;

	switch (compat) {
	case E2P_FEATURE_COMPAT:
		fchar = 'C';
		break;
	case E2P_FEATURE_INCOMPAT:
		fchar = 'I';
		break;
        case E2P_FEATURE_RO_INCOMPAT:
		fchar = 'R';
		break;
        default:
		fchar = '?';
		break;
	}

	for (fnum = 0; mask >>= 1; fnum++);
	snprintf(buf, buf_len, "FEATURE_%c%d", fchar, fnum);

	return buf;
}

and these are used by both e2p_feature2string() (using a static buffer
for unknown_feature2string()) and e2p_feature_string_copy() (using the
caller supplied buffer), which avoids almost all of the code duplication.

It is worthwhile to have Ted weigh in on his preference for this.

> Or alternatively, I suppose:
> 
> c) Use a thread-local internal buffer to return the results, so it's
> at least thread-safe (though still not really safe, since multiple
> calls on one line - e.g. as printf arguments - could still evaluate to
> incorrect results).

Probably more complexity than needed, and doesn't really solve the problem.

Cheers, Andreas





--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o Dec. 19, 2014, 3:21 a.m. UTC | #6
On Thu, Dec 18, 2014 at 02:58:38PM -0700, Andreas Dilger wrote:
> 
> It is really a bad idea to be using a feature bit that isn't at least
> _registered_ in e2fsprogs, even if the full support isn't there, since
> you run a real risk of someone else using that feature bit for some
> completely different feature, and mass problems may result (e.g. e2fsck
> incorrectly thinking it understands the feature when it doesn't).

Well, it can happen if you are fuzz-testing.  The other possibility is
that I happen to know that the version of e2fsprogs that Wade was
using is rather old, for hysterical reasons.  (We should fix this, and
in fact there had been some suggestions that we fix this for other
reasons, but we've just not gotten around to it.)

> > I can do either, if you insist.  What would you prefer:
> > 
> > a) Statically defining the various FEATURE_foo strings, as you describe.
> > b) Duplicating the implementation of the two functions so that
> > e2p_feature2string() can retain its prior behaviour of returning a
> > string constant in the common cases.

A third possibiltiy is to change e2p_feature_to_string to only use the
passed-in buffer if it is given an unknown feature, but other times,
it will return a static constant string, just as e2p_feature2string
does today.  That is, change the contract of e2p_feature_to_string
such that the buffer MAY be used by e2p_feature_to_string, instead of
MUST be used....

I believe I've seen other cases where the foo_r() version of a foo()
function works in this way; the passed-in buffer is used if it's
necessary for thread-safety, but it is not guaranteed that the buffer
is used; only that foo_r() is thread-safe.

Cheers,

						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/lib/e2p/e2p.h b/lib/e2p/e2p.h
index 5fa41f4..53f0183 100644
--- a/lib/e2p/e2p.h
+++ b/lib/e2p/e2p.h
@@ -45,6 +45,8 @@  void print_fs_state (FILE * f, unsigned short state);
 int setflags (int fd, unsigned long flags);
 int setversion (int fd, unsigned long version);

+void e2p_feature_to_string(int compat, unsigned int mask, char *buf,
+                          size_t buf_len);
 const char *e2p_feature2string(int compat, unsigned int mask);
 const char *e2p_jrnl_feature2string(int compat, unsigned int mask);
 int e2p_string2feature(char *string, int *compat, unsigned int *mask);
diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
index 6e53cfe..6dca315 100644
--- a/lib/e2p/feature.c
+++ b/lib/e2p/feature.c
@@ -117,17 +117,20 @@  static struct feature jrnl_feature_list[] = {
        {       0, 0, 0 },
 };

-const char *e2p_feature2string(int compat, unsigned int mask)
+void e2p_feature_to_string(int compat, unsigned int mask, char *buf,
+                           size_t buf_len)
 {
        struct feature  *f;
-       static char buf[20];
        char    fchar;
        int     fnum;

        for (f = feature_list; f->string; f++) {
                if ((compat == f->compat) &&
-                   (mask == f->mask))
-                       return f->string;
+                   (mask == f->mask)) {
+                       strncpy(buf, f->string, buf_len);
+                       buf[buf_len - 1] = 0;
+                       return;
+                }
        }
        switch (compat) {
        case  E2P_FEATURE_COMPAT:
@@ -144,7 +147,13 @@  const char *e2p_feature2string(int compat,
unsigned int mask)
                break;
        }
        for (fnum = 0; mask >>= 1; fnum++);
-       sprintf(buf, "FEATURE_%c%d", fchar, fnum);
+       snprintf(buf, buf_len, "FEATURE_%c%d", fchar, fnum);
+}
+
+const char *e2p_feature2string(int compat, unsigned int mask)
+{
+       static char buf[20];
+       e2p_feature_to_string(compat, mask, buf, sizeof(buf) / sizeof(buf[0]));
        return buf;
 }