Message ID | 1459503215-21039-1-git-send-email-matteo@openwrt.org |
---|---|
State | New |
Headers | show |
AFAIK this format is not defined in any standard or specification GLIBC follows neither I think it is worth an GNU extension. There are some problems in adding an intrinsic modifier that are not supported in others platforms: compatibility, the modifier can be defined in a future POSIX standard, how to enable (with _GNU_SOURCE of something), etc. You can implement it as printf customization [1], which is far from perfect but with some care gets the job done. [1] http://www.gnu.org/software/libc/manual/html_node/Customizing-Printf.html On 01-04-2016 06:33, Matteo Croce wrote: > Since Fibonacci adoption in the 13th century, Arabic numbers have become > the standard numeric system used in every culture. > However, older numeral systems are yet used in many contexts, > for example, ancient Roman numerals are often used to represent > software version, protocol revision or even movie chapters. > > This patch adds the `%r' modifier in all the *printf functions, > which can be used to represent a number in Roman numerals. > This has two big advantages: > > first of all there is no need to hardcode text strings in > the code which needs to be updated on every version change. > e.g. printf("System V booting") or printf("Text Editor for OS X") > can be replaced with: > printf("System %R booting", ver) and printf("Text Editor for OS %R", ver) > and the right version string is generated at runtime. > > The second advantage is that Roman numerals are very lengthy and even a small > 16 bit number can occupy up to 15 bytes, > thus leading to a 650% increase in code size. > > For a technical limitation the maximum number that can be represented in Roman > numerals are limited to 3999, but archaeologist agrees that ancient Romans uses > a superscript to multiply by 1000 and a vertical line to multiply to 1 million. > > To avoid using Unicode characters in such basic IO routines, a pair of _ are > used to represent a thousand value, e.g. _XX_ for 20 000 > and a | to represent millions, e.g. |L| for 50 000 000. This increase the > maximum integer that can be represented to 499 999 999 which should be > a reasonable value for our concern, bigger numbers will print as `(infinitum)' > > Another issue is the zero value which was missing in the Roman system, > indeed the zero sign was "imported" by Fibonacci from North Africa, > so when trying to print 0 as Roman numeral, the string `(nihil)', > which is the Latin word for `nothing', will printed like `(null)` for pointers. > > Roman numerals can be generated both in upper and lower case > respectively, with the `%R' and `%r' modifier. > --- > stdio-common/vfprintf.c | 109 +++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 93 insertions(+), 16 deletions(-) > > diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c > index 6829d4d..e85c29c 100644 > --- a/stdio-common/vfprintf.c > +++ b/stdio-common/vfprintf.c > @@ -215,20 +215,20 @@ static const uint8_t jump_table[] = > /* '4' */ 8, /* '5' */ 8, /* '6' */ 8, /* '7' */ 8, > /* '8' */ 8, /* '9' */ 8, 0, 0, > 0, 0, 0, 0, > - 0, /* 'A' */ 26, 0, /* 'C' */ 25, > - 0, /* 'E' */ 19, /* F */ 19, /* 'G' */ 19, > - 0, /* 'I' */ 29, 0, 0, > + 0, /* 'A' */ 27, 0, /* 'C' */ 26, > + 0, /* 'E' */ 20, /* F */ 20, /* 'G' */ 20, > + 0, /* 'I' */ 30, 0, 0, > /* 'L' */ 12, 0, 0, 0, > - 0, 0, 0, /* 'S' */ 21, > + 0, 0, /* 'R' */ 19, /* 'S' */ 22, > 0, 0, 0, 0, > /* 'X' */ 18, 0, /* 'Z' */ 13, 0, > 0, 0, 0, 0, > - 0, /* 'a' */ 26, 0, /* 'c' */ 20, > - /* 'd' */ 15, /* 'e' */ 19, /* 'f' */ 19, /* 'g' */ 19, > - /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 28, 0, > - /* 'l' */ 11, /* 'm' */ 24, /* 'n' */ 23, /* 'o' */ 17, > - /* 'p' */ 22, /* 'q' */ 12, 0, /* 's' */ 21, > - /* 't' */ 27, /* 'u' */ 16, 0, 0, > + 0, /* 'a' */ 27, 0, /* 'c' */ 21, > + /* 'd' */ 15, /* 'e' */ 20, /* 'f' */ 20, /* 'g' */ 20, > + /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 29, 0, > + /* 'l' */ 11, /* 'm' */ 25, /* 'n' */ 24, /* 'o' */ 17, > + /* 'p' */ 23, /* 'q' */ 12, /* 'r' */ 19, /* 's' */ 22, > + /* 't' */ 28, /* 'u' */ 16, 0, 0, > /* 'x' */ 18, 0, /* 'z' */ 13 > }; > > @@ -269,7 +269,7 @@ static const uint8_t jump_table[] = > > #define STEP0_3_TABLE \ > /* Step 0: at the beginning. */ \ > - static JUMP_TABLE_TYPE step0_jumps[30] = \ > + static JUMP_TABLE_TYPE step0_jumps[31] = \ > { \ > REF (form_unknown), \ > REF (flag_space), /* for ' ' */ \ > @@ -290,6 +290,7 @@ static const uint8_t jump_table[] = > REF (form_unsigned), /* for 'u' */ \ > REF (form_octal), /* for 'o' */ \ > REF (form_hexa), /* for 'X', 'x' */ \ > + REF (form_roman), /* for 'R', 'r' */ \ > REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ > REF (form_character), /* for 'c' */ \ > REF (form_string), /* for 's', 'S' */ \ > @@ -303,7 +304,7 @@ static const uint8_t jump_table[] = > REF (flag_i18n), /* for 'I' */ \ > }; \ > /* Step 1: after processing width. */ \ > - static JUMP_TABLE_TYPE step1_jumps[30] = \ > + static JUMP_TABLE_TYPE step1_jumps[31] = \ > { \ > REF (form_unknown), \ > REF (form_unknown), /* for ' ' */ \ > @@ -324,6 +325,7 @@ static const uint8_t jump_table[] = > REF (form_unsigned), /* for 'u' */ \ > REF (form_octal), /* for 'o' */ \ > REF (form_hexa), /* for 'X', 'x' */ \ > + REF (form_roman), /* for 'R', 'r' */ \ > REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ > REF (form_character), /* for 'c' */ \ > REF (form_string), /* for 's', 'S' */ \ > @@ -337,7 +339,7 @@ static const uint8_t jump_table[] = > REF (form_unknown) /* for 'I' */ \ > }; \ > /* Step 2: after processing precision. */ \ > - static JUMP_TABLE_TYPE step2_jumps[30] = \ > + static JUMP_TABLE_TYPE step2_jumps[31] = \ > { \ > REF (form_unknown), \ > REF (form_unknown), /* for ' ' */ \ > @@ -358,6 +360,7 @@ static const uint8_t jump_table[] = > REF (form_unsigned), /* for 'u' */ \ > REF (form_octal), /* for 'o' */ \ > REF (form_hexa), /* for 'X', 'x' */ \ > + REF (form_roman), /* for 'R', 'r' */ \ > REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ > REF (form_character), /* for 'c' */ \ > REF (form_string), /* for 's', 'S' */ \ > @@ -371,7 +374,7 @@ static const uint8_t jump_table[] = > REF (form_unknown) /* for 'I' */ \ > }; \ > /* Step 3a: after processing first 'h' modifier. */ \ > - static JUMP_TABLE_TYPE step3a_jumps[30] = \ > + static JUMP_TABLE_TYPE step3a_jumps[31] = \ > { \ > REF (form_unknown), \ > REF (form_unknown), /* for ' ' */ \ > @@ -392,6 +395,7 @@ static const uint8_t jump_table[] = > REF (form_unsigned), /* for 'u' */ \ > REF (form_octal), /* for 'o' */ \ > REF (form_hexa), /* for 'X', 'x' */ \ > + REF (form_roman), /* for 'R', 'r' */ \ > REF (form_unknown), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ > REF (form_unknown), /* for 'c' */ \ > REF (form_unknown), /* for 's', 'S' */ \ > @@ -405,7 +409,7 @@ static const uint8_t jump_table[] = > REF (form_unknown) /* for 'I' */ \ > }; \ > /* Step 3b: after processing first 'l' modifier. */ \ > - static JUMP_TABLE_TYPE step3b_jumps[30] = \ > + static JUMP_TABLE_TYPE step3b_jumps[31] = \ > { \ > REF (form_unknown), \ > REF (form_unknown), /* for ' ' */ \ > @@ -426,6 +430,7 @@ static const uint8_t jump_table[] = > REF (form_unsigned), /* for 'u' */ \ > REF (form_octal), /* for 'o' */ \ > REF (form_hexa), /* for 'X', 'x' */ \ > + REF (form_roman), /* for 'R', 'r' */ \ > REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ > REF (form_character), /* for 'c' */ \ > REF (form_string), /* for 's', 'S' */ \ > @@ -441,7 +446,7 @@ static const uint8_t jump_table[] = > > #define STEP4_TABLE \ > /* Step 4: processing format specifier. */ \ > - static JUMP_TABLE_TYPE step4_jumps[30] = \ > + static JUMP_TABLE_TYPE step4_jumps[31] = \ > { \ > REF (form_unknown), \ > REF (form_unknown), /* for ' ' */ \ > @@ -462,6 +467,7 @@ static const uint8_t jump_table[] = > REF (form_unsigned), /* for 'u' */ \ > REF (form_octal), /* for 'o' */ \ > REF (form_hexa), /* for 'X', 'x' */ \ > + REF (form_roman), /* for 'R', 'r' */ \ > REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ > REF (form_character), /* for 'c' */ \ > REF (form_string), /* for 's', 'S' */ \ > @@ -741,6 +747,11 @@ static const uint8_t jump_table[] = > break; \ > } \ > \ > + LABEL (form_roman): \ > + /* Ancient Roman / Latin number. */ \ > + roman(s, va_arg (ap, int), spec == L_('R')); \ > + break; \ > + \ > LABEL (form_float): \ > { \ > /* Floating-point number. This is handled by printf_fp.c. */ \ > @@ -1210,6 +1221,72 @@ static const uint8_t jump_table[] = > break; > #endif > > +static const int numbers[] = { 1000, 900, 500, 400, 100, 90, > + 50, 40, 10, 9, 5, 4, 1 }; > +static const CHAR_T *_rul[] = { L_("M"), L_("CM"), L_("D"), L_("CD"), L_("C"), > + L_("XC"), L_("L"), L_("XL"), L_("X"), > + L_("IX"), L_("V"), L_("IV"), L_("I") }; > +static const CHAR_T *_rll[] = { L_("m"), L_("cm"), L_("d"), L_("cd"), L_("c"), > + L_("xc"), L_("l"), L_("xl"), L_("x"), > + L_("ix"), L_("v"), L_("iv"), L_("i") }; > + > +static void roman(FILE *s, long int num, int upper_case) > +{ > + /* used by outchar */ > + int done = 0; > + > + const CHAR_T **letters = upper_case ? _rul : _rll; > + > + if(!num) > + { > + outstring(L_("(nihil)"), 7); > + return; > + } > + > + if(num < 0) > + { > + outchar(L_('-')); > + num = -num; > + } > + > + if(num > 499999999) > + { > + outstring(L_("(infinitum)"), 11); > + return; > + } > + > + if(num > 99999) > + { > + outchar(L_('|')); > + roman(s, num / 100000, upper_case); > + outchar(L_('|')); > + num %= 100000; > + } > + > + if(num > 4999) > + { > + outchar(L_('_')); > + roman(s, num / 1000, upper_case); > + outchar(L_('_')); > + num %= 1000; > + } > + > + for (int i = 0; i < sizeof(numbers) / sizeof(*numbers); i++) > + { > + while (num >= numbers[i]) > + { > + outchar(letters[i][0]); > + if(letters[i][1]) > + outchar(letters[i][1]); > + num -= numbers[i]; > + } > + } > + > + /* suppress warnings */ > + all_done: > + return; > +} > + > /* Helper function to provide temporary buffering for unbuffered streams. */ > static int buffered_vfprintf (FILE *stream, const CHAR_T *fmt, va_list) > __THROW __attribute__ ((noinline)) internal_function; >
On 04/01/2016 05:33 AM, Matteo Croce wrote: > This patch adds the `%r' modifier in all the *printf functions, > which can be used to represent a number in Roman numerals. > This has two big advantages: High-level review: (a) Standardize '%r' and '%R' first. As Adhemerval notes, this needs to go through POSIX or ISO C first to standardize the '%r' and '%R' modifiers. Working with the Austin Group is pretty easy actually, you need to file a ticket with their system and discuss the change to the standard: Austin Group homepage: http://www.opengroup.org/austin/ Austin Group bug tracker: http://austingroupbugs.net/main_page.php (b) Follow the contribution checklist. After you have standardized the modifiers you go through the contribution checklist here: https://sourceware.org/glibc/wiki/Contribution%20checklist (c) Needs tests. You need tests for all of the characters you are adding, and the output they might generate.
Matteo Croce wrote: > ancient Roman numerals are often used to represent > software version, protocol revision or even movie chapters. That is the best April Fool joke I've seen in many a year! Thanks and congratulations. I had already composed an email that among other things corrected your Latin (it's "nulla", not "nihil"!) before I realized I'd been had.
> This patch adds the `%r' modifier in all the *printf functions, > which can be used to represent a number in Roman numerals. A bit late to the April 1 party, but a C++ Standard Library implementation I worked on years ago actually has this feature. It makes it possible to use the usual C++ iostream inserter and extractor operators to format and parse not only Roman numerals (that was done mostly just to give base 1 a meaning), but more usefully numbers in any base between 1 and 36. The implementation still ships with a number of compilers, though I think only one (Compaq/HP CC for Tru64) has the version with the Roman numerals. Martin
diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c index 6829d4d..e85c29c 100644 --- a/stdio-common/vfprintf.c +++ b/stdio-common/vfprintf.c @@ -215,20 +215,20 @@ static const uint8_t jump_table[] = /* '4' */ 8, /* '5' */ 8, /* '6' */ 8, /* '7' */ 8, /* '8' */ 8, /* '9' */ 8, 0, 0, 0, 0, 0, 0, - 0, /* 'A' */ 26, 0, /* 'C' */ 25, - 0, /* 'E' */ 19, /* F */ 19, /* 'G' */ 19, - 0, /* 'I' */ 29, 0, 0, + 0, /* 'A' */ 27, 0, /* 'C' */ 26, + 0, /* 'E' */ 20, /* F */ 20, /* 'G' */ 20, + 0, /* 'I' */ 30, 0, 0, /* 'L' */ 12, 0, 0, 0, - 0, 0, 0, /* 'S' */ 21, + 0, 0, /* 'R' */ 19, /* 'S' */ 22, 0, 0, 0, 0, /* 'X' */ 18, 0, /* 'Z' */ 13, 0, 0, 0, 0, 0, - 0, /* 'a' */ 26, 0, /* 'c' */ 20, - /* 'd' */ 15, /* 'e' */ 19, /* 'f' */ 19, /* 'g' */ 19, - /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 28, 0, - /* 'l' */ 11, /* 'm' */ 24, /* 'n' */ 23, /* 'o' */ 17, - /* 'p' */ 22, /* 'q' */ 12, 0, /* 's' */ 21, - /* 't' */ 27, /* 'u' */ 16, 0, 0, + 0, /* 'a' */ 27, 0, /* 'c' */ 21, + /* 'd' */ 15, /* 'e' */ 20, /* 'f' */ 20, /* 'g' */ 20, + /* 'h' */ 10, /* 'i' */ 15, /* 'j' */ 29, 0, + /* 'l' */ 11, /* 'm' */ 25, /* 'n' */ 24, /* 'o' */ 17, + /* 'p' */ 23, /* 'q' */ 12, /* 'r' */ 19, /* 's' */ 22, + /* 't' */ 28, /* 'u' */ 16, 0, 0, /* 'x' */ 18, 0, /* 'z' */ 13 }; @@ -269,7 +269,7 @@ static const uint8_t jump_table[] = #define STEP0_3_TABLE \ /* Step 0: at the beginning. */ \ - static JUMP_TABLE_TYPE step0_jumps[30] = \ + static JUMP_TABLE_TYPE step0_jumps[31] = \ { \ REF (form_unknown), \ REF (flag_space), /* for ' ' */ \ @@ -290,6 +290,7 @@ static const uint8_t jump_table[] = REF (form_unsigned), /* for 'u' */ \ REF (form_octal), /* for 'o' */ \ REF (form_hexa), /* for 'X', 'x' */ \ + REF (form_roman), /* for 'R', 'r' */ \ REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ REF (form_character), /* for 'c' */ \ REF (form_string), /* for 's', 'S' */ \ @@ -303,7 +304,7 @@ static const uint8_t jump_table[] = REF (flag_i18n), /* for 'I' */ \ }; \ /* Step 1: after processing width. */ \ - static JUMP_TABLE_TYPE step1_jumps[30] = \ + static JUMP_TABLE_TYPE step1_jumps[31] = \ { \ REF (form_unknown), \ REF (form_unknown), /* for ' ' */ \ @@ -324,6 +325,7 @@ static const uint8_t jump_table[] = REF (form_unsigned), /* for 'u' */ \ REF (form_octal), /* for 'o' */ \ REF (form_hexa), /* for 'X', 'x' */ \ + REF (form_roman), /* for 'R', 'r' */ \ REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ REF (form_character), /* for 'c' */ \ REF (form_string), /* for 's', 'S' */ \ @@ -337,7 +339,7 @@ static const uint8_t jump_table[] = REF (form_unknown) /* for 'I' */ \ }; \ /* Step 2: after processing precision. */ \ - static JUMP_TABLE_TYPE step2_jumps[30] = \ + static JUMP_TABLE_TYPE step2_jumps[31] = \ { \ REF (form_unknown), \ REF (form_unknown), /* for ' ' */ \ @@ -358,6 +360,7 @@ static const uint8_t jump_table[] = REF (form_unsigned), /* for 'u' */ \ REF (form_octal), /* for 'o' */ \ REF (form_hexa), /* for 'X', 'x' */ \ + REF (form_roman), /* for 'R', 'r' */ \ REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ REF (form_character), /* for 'c' */ \ REF (form_string), /* for 's', 'S' */ \ @@ -371,7 +374,7 @@ static const uint8_t jump_table[] = REF (form_unknown) /* for 'I' */ \ }; \ /* Step 3a: after processing first 'h' modifier. */ \ - static JUMP_TABLE_TYPE step3a_jumps[30] = \ + static JUMP_TABLE_TYPE step3a_jumps[31] = \ { \ REF (form_unknown), \ REF (form_unknown), /* for ' ' */ \ @@ -392,6 +395,7 @@ static const uint8_t jump_table[] = REF (form_unsigned), /* for 'u' */ \ REF (form_octal), /* for 'o' */ \ REF (form_hexa), /* for 'X', 'x' */ \ + REF (form_roman), /* for 'R', 'r' */ \ REF (form_unknown), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ REF (form_unknown), /* for 'c' */ \ REF (form_unknown), /* for 's', 'S' */ \ @@ -405,7 +409,7 @@ static const uint8_t jump_table[] = REF (form_unknown) /* for 'I' */ \ }; \ /* Step 3b: after processing first 'l' modifier. */ \ - static JUMP_TABLE_TYPE step3b_jumps[30] = \ + static JUMP_TABLE_TYPE step3b_jumps[31] = \ { \ REF (form_unknown), \ REF (form_unknown), /* for ' ' */ \ @@ -426,6 +430,7 @@ static const uint8_t jump_table[] = REF (form_unsigned), /* for 'u' */ \ REF (form_octal), /* for 'o' */ \ REF (form_hexa), /* for 'X', 'x' */ \ + REF (form_roman), /* for 'R', 'r' */ \ REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ REF (form_character), /* for 'c' */ \ REF (form_string), /* for 's', 'S' */ \ @@ -441,7 +446,7 @@ static const uint8_t jump_table[] = #define STEP4_TABLE \ /* Step 4: processing format specifier. */ \ - static JUMP_TABLE_TYPE step4_jumps[30] = \ + static JUMP_TABLE_TYPE step4_jumps[31] = \ { \ REF (form_unknown), \ REF (form_unknown), /* for ' ' */ \ @@ -462,6 +467,7 @@ static const uint8_t jump_table[] = REF (form_unsigned), /* for 'u' */ \ REF (form_octal), /* for 'o' */ \ REF (form_hexa), /* for 'X', 'x' */ \ + REF (form_roman), /* for 'R', 'r' */ \ REF (form_float), /* for 'E', 'e', 'F', 'f', 'G', 'g' */ \ REF (form_character), /* for 'c' */ \ REF (form_string), /* for 's', 'S' */ \ @@ -741,6 +747,11 @@ static const uint8_t jump_table[] = break; \ } \ \ + LABEL (form_roman): \ + /* Ancient Roman / Latin number. */ \ + roman(s, va_arg (ap, int), spec == L_('R')); \ + break; \ + \ LABEL (form_float): \ { \ /* Floating-point number. This is handled by printf_fp.c. */ \ @@ -1210,6 +1221,72 @@ static const uint8_t jump_table[] = break; #endif +static const int numbers[] = { 1000, 900, 500, 400, 100, 90, + 50, 40, 10, 9, 5, 4, 1 }; +static const CHAR_T *_rul[] = { L_("M"), L_("CM"), L_("D"), L_("CD"), L_("C"), + L_("XC"), L_("L"), L_("XL"), L_("X"), + L_("IX"), L_("V"), L_("IV"), L_("I") }; +static const CHAR_T *_rll[] = { L_("m"), L_("cm"), L_("d"), L_("cd"), L_("c"), + L_("xc"), L_("l"), L_("xl"), L_("x"), + L_("ix"), L_("v"), L_("iv"), L_("i") }; + +static void roman(FILE *s, long int num, int upper_case) +{ + /* used by outchar */ + int done = 0; + + const CHAR_T **letters = upper_case ? _rul : _rll; + + if(!num) + { + outstring(L_("(nihil)"), 7); + return; + } + + if(num < 0) + { + outchar(L_('-')); + num = -num; + } + + if(num > 499999999) + { + outstring(L_("(infinitum)"), 11); + return; + } + + if(num > 99999) + { + outchar(L_('|')); + roman(s, num / 100000, upper_case); + outchar(L_('|')); + num %= 100000; + } + + if(num > 4999) + { + outchar(L_('_')); + roman(s, num / 1000, upper_case); + outchar(L_('_')); + num %= 1000; + } + + for (int i = 0; i < sizeof(numbers) / sizeof(*numbers); i++) + { + while (num >= numbers[i]) + { + outchar(letters[i][0]); + if(letters[i][1]) + outchar(letters[i][1]); + num -= numbers[i]; + } + } + + /* suppress warnings */ + all_done: + return; +} + /* Helper function to provide temporary buffering for unbuffered streams. */ static int buffered_vfprintf (FILE *stream, const CHAR_T *fmt, va_list) __THROW __attribute__ ((noinline)) internal_function;