diff mbox series

[v2] strftime: Allow use of both 'O' and 'E' like "%OEy", "%OEY" [BZ #24453]

Message ID 201904182241.AA04342@tamuki.linet.gr.jp
State New
Headers show
Series [v2] strftime: Allow use of both 'O' and 'E' like "%OEy", "%OEY" [BZ #24453] | expand

Commit Message

TAMUKI Shoichi April 18, 2019, 10:41 p.m. UTC
As a GNU extension, both the 'O' and 'E' modifiers can now be applied
simultaneously like "%OEy" and "%OEY".  The full representation of the
alternative calendar year with alternative numeric symbols (%OEY)
typically includes an internal use of "%Ey".  Apply "%OEY" to the
internal "%Ey", allowing users of "%OEY" to control how the year
number is represented.

This change applies to all locales.  For locales that use era and
alt_digits, such as ja_JP and lzh_TW, this feature should be useful.

The variable "modifier" in __strftime_internal function now represents
four states: 'E', 'O', "E with O", or 0 (neither 'E' nor 'O').
Furthermore, the newly added MODIFIER_E and MODIFIER_O macros help to
reduce conditional statement changes using "modifier" variable.

Tested on x86_64.

ChangeLog:

	[BZ #24453]
	* manual/time.texi (strftime): Document "%Ey" and "%EY".
	* time/strftime_l.c (__strftime_internal): Allow both the 'O' and 'E'
	modifiers to be applied simultaneously to conversion specifications.
	(MODIFIER_E, MODIFIER_O): New macros.
	If a modifier ('O') is specified to "%EY", interpret the "%Ey" in the
	subformat as if decorated with that modifier.
	* time/tst-strftime2.c (formats): Add "%OEY".
	(mkreftable): Add new variables "alt_yr", "alt_yrj", and "alt_yrc" to
	represent Japanese era and R.O.C. calendar year in Chinese numeral to
	be checked.
---
 Changes since v1:
 - support just %OE*, without repetition
 - slightly modify the documentation including commit message

 NEWS                 |  6 ++++
 manual/time.texi     | 16 ++++++++++
 time/strftime_l.c    | 86 ++++++++++++++++++++++++++++++----------------------
 time/tst-strftime2.c | 39 +++++++++++++++++++-----
 4 files changed, 104 insertions(+), 43 deletions(-)
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index b58e2469d4..236527f9ee 100644
--- a/NEWS
+++ b/NEWS
@@ -24,6 +24,12 @@  Major new features:
 
 * The entry for the new Japanese era has been added for ja_JP locale.
 
+* As a GNU extension, both the 'O' and 'E' modifiers can now be
+  applied simultaneously to "%y" (%OEy) to produce the locale's
+  alternative year with alternative numeric symbols.  Also, both the
+  'O' and 'E' modifiers can now be applied simultaneously to "%Y"
+  (%OEY); they have the same effect that they would on "%OEy".
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * The functions clock_gettime, clock_getres, clock_settime,
diff --git a/manual/time.texi b/manual/time.texi
index bfa46dd45b..116a8395dc 100644
--- a/manual/time.texi
+++ b/manual/time.texi
@@ -1579,6 +1579,14 @@  However, by default it is zero-padded to a minimum of two digits (this
 can be overridden by an explicit field width or by the @code{_} and
 @code{-} flags).
 
+As a GNU extension, if both the @code{O} and @code{E} modifiers are
+specified (@code{%OEy}), instead produces the year number according to
+locale-specific alternative calendar with alternative numeric symbols.
+Generally, since the alternative numeric symbols are defined from 0 to
+99, when the calendar year number is more than 100, produces the
+ordinary number as a fallback.  The limit might be actually locale
+dependent.
+
 @item %Y
 The year as a decimal number, using the Gregorian calendar.  Years
 before the year @code{1} are numbered @code{0}, @code{-1}, and so on.
@@ -1590,6 +1598,14 @@  information produced by @code{%EC} and @code{%Ey}.  As a GNU
 extension, the formatting flags @code{_} or @code{-} may be used with
 this conversion specifier; they affect how the year number is printed.
 
+As a GNU extension, if both the @code{O} and @code{E} modifiers are
+specified (@code{%OEY}), instead produces a complete representation of
+the year according to the locale's alternative calendar with
+alternative numeric symbols.  Generally, since the alternative numeric
+symbols are defined from 0 to 99, when the calendar year number is
+more than 100, produces the ordinary number as a fallback.  The limit
+might be actually locale dependent.
+
 @item %z
 @w{RFC 822}/@w{ISO 8601:1988} style numeric time zone (e.g.,
 @code{-0600} or @code{+0100}), or nothing if no time zone is
diff --git a/time/strftime_l.c b/time/strftime_l.c
index 98c35d58a2..ec2ed6935c 100644
--- a/time/strftime_l.c
+++ b/time/strftime_l.c
@@ -713,14 +713,21 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
       switch (*f)
 	{
 	case L_('E'):
-	case L_('O'):
 	  modifier = *f++;
 	  break;
 
+	case L_('O'):
+	  modifier = *f++ << 8;
+	  if (*f == L_('E'))
+	    modifier |= *f++;
+	  break;
+
 	default:
 	  modifier = 0;
 	  break;
 	}
+#define MODIFIER_E (modifier & 0xff)
+#define MODIFIER_O (modifier >> 8 & 0xff)
 
       /* Now do the specified format.  */
       format_char = *f;
@@ -786,10 +793,10 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	      to_uppcase = 1;
 	      to_lowcase = 0;
 	    }
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 #if defined _NL_CURRENT || !HAVE_STRFTIME
-	  if (modifier == L_('O'))
+	  if (MODIFIER_O == L_('O'))
 	    cpy (aam_len, a_altmonth);
 	  else
 	    cpy (am_len, a_month);
@@ -799,7 +806,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 #endif
 
 	case L_('B'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 	  if (change_case)
 	    {
@@ -807,7 +814,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	      to_lowcase = 0;
 	    }
 #if defined _NL_CURRENT || !HAVE_STRFTIME
-	  if (modifier == L_('O'))
+	  if (MODIFIER_O == L_('O'))
 	    cpy (STRLEN (f_altmonth), f_altmonth);
 	  else
 	    cpy (STRLEN (f_month), f_month);
@@ -817,10 +824,10 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 #endif
 
 	case L_('c'):
-	  if (modifier == L_('O'))
+	  if (MODIFIER_O == L_('O'))
 	    goto bad_format;
 #ifdef _NL_CURRENT
-	  if (! (modifier == L_('E')
+	  if (! (MODIFIER_E == L_('E')
 		 && (*(subfmt =
 		       (const CHAR_T *) _NL_CURRENT (LC_TIME,
 						     NLW(ERA_D_T_FMT)))
@@ -858,7 +865,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  {
 	    /* The relevant information is available only via the
 	       underlying strftime implementation, so use that.  */
-	    char ufmt[4];
+	    char ufmt[5];
 	    char *u = ufmt;
 	    char ubuf[1024]; /* enough for any single format in practice */
 	    size_t len;
@@ -871,8 +878,10 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 # endif
 
 	    *u++ = '%';
-	    if (modifier != 0)
-	      *u++ = modifier;
+	    if (MODIFIER_O == L_('O'))
+	      *u++ = 'O';
+	    if (MODIFIER_E == L_('E'))
+	      *u++ = 'E';
 	    *u++ = format_char;
 	    *u = '\0';
 	    len = strftime (ubuf, sizeof ubuf, ufmt, tp);
@@ -884,7 +893,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 #endif
 
 	case L_('C'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    {
 #if HAVE_STRUCT_ERA_ENTRY
 	      struct era_entry *era = _nl_get_era_entry (tp HELPER_LOCALE_ARG);
@@ -912,10 +921,10 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  }
 
 	case L_('x'):
-	  if (modifier == L_('O'))
+	  if (MODIFIER_O == L_('O'))
 	    goto bad_format;
 #ifdef _NL_CURRENT
-	  if (! (modifier == L_('E')
+	  if (! (MODIFIER_E == L_('E')
 		 && (*(subfmt =
 		       (const CHAR_T *) _NL_CURRENT (LC_TIME, NLW(ERA_D_FMT)))
 		     != L_('\0'))))
@@ -935,13 +944,13 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  goto subformat;
 
 	case L_('d'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, tp->tm_mday);
 
 	case L_('e'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER_SPACEPAD (2, tp->tm_mday);
@@ -957,7 +966,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	do_number:
 	  /* Format the number according to the MODIFIER flag.  */
 
-	  if (modifier == L_('O') && 0 <= number_value)
+	  if (MODIFIER_O == L_('O') && 0 <= number_value)
 	    {
 #ifdef _NL_CURRENT
 	      /* Get the locale specific alternate representation of
@@ -1047,43 +1056,43 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  goto subformat;
 
 	case L_('H'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, tp->tm_hour);
 
 	case L_('I'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, hour12);
 
 	case L_('k'):		/* GNU extension.  */
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER_SPACEPAD (2, tp->tm_hour);
 
 	case L_('l'):		/* GNU extension.  */
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER_SPACEPAD (2, hour12);
 
 	case L_('j'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (3, 1 + tp->tm_yday);
 
 	case L_('M'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, tp->tm_min);
 
 	case L_('m'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, tp->tm_mon + 1);
@@ -1130,7 +1139,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 #endif
 
 	case L_('S'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, tp->tm_sec);
@@ -1175,10 +1184,10 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  }
 
 	case L_('X'):
-	  if (modifier == L_('O'))
+	  if (MODIFIER_O == L_('O'))
 	    goto bad_format;
 #ifdef _NL_CURRENT
-	  if (! (modifier == L_('E')
+	  if (! (MODIFIER_E == L_('E')
 		 && (*(subfmt =
 		       (const CHAR_T *) _NL_CURRENT (LC_TIME, NLW(ERA_T_FMT)))
 		     != L_('\0'))))
@@ -1203,7 +1212,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  DO_NUMBER (1, (tp->tm_wday - 1 + 7) % 7 + 1);
 
 	case L_('U'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, (tp->tm_yday - tp->tm_wday + 7) / 7);
@@ -1211,7 +1220,7 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	case L_('V'):
 	case L_('g'):
 	case L_('G'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 	  {
 	    int year = tp->tm_year + TM_YEAR_BASE;
@@ -1250,19 +1259,19 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	  }
 
 	case L_('W'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (2, (tp->tm_yday - (tp->tm_wday - 1 + 7) % 7 + 7) / 7);
 
 	case L_('w'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    goto bad_format;
 
 	  DO_NUMBER (1, tp->tm_wday);
 
 	case L_('Y'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    {
 #if HAVE_STRUCT_ERA_ENTRY
 	      struct era_entry *era = _nl_get_era_entry (tp HELPER_LOCALE_ARG);
@@ -1273,8 +1282,8 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 # else
 		  subfmt = era->era_format;
 # endif
-		  if (pad != 0)
-		    yr_spec = pad;
+		  if (pad != 0 || MODIFIER_O == L_('O'))
+		    yr_spec = (MODIFIER_O == L_('O')) ? L_('O') : pad;
 		  goto subformat;
 		}
 #else
@@ -1283,13 +1292,13 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 # endif
 #endif
 	    }
-	  if (modifier == L_('O'))
+	  if (MODIFIER_O == L_('O'))
 	    goto bad_format;
 	  else
 	    DO_NUMBER (1, tp->tm_year + TM_YEAR_BASE);
 
 	case L_('y'):
-	  if (modifier == L_('E'))
+	  if (MODIFIER_E == L_('E'))
 	    {
 #if HAVE_STRUCT_ERA_ENTRY
 	      struct era_entry *era = _nl_get_era_entry (tp HELPER_LOCALE_ARG);
@@ -1297,7 +1306,12 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 		{
 		  int delta = tp->tm_year - era->start_date[0];
 		  if (yr_spec != 0)
-		    pad = yr_spec;
+		    {
+		      if (yr_spec == L_('O'))
+			modifier |= yr_spec << 8;
+		      else
+			pad = yr_spec;
+		    }
 		  DO_NUMBER (2, (era->offset
 				 + delta * era->absolute_direction));
 		}
diff --git a/time/tst-strftime2.c b/time/tst-strftime2.c
index 18dbf1b32f..ce3b50e320 100644
--- a/time/tst-strftime2.c
+++ b/time/tst-strftime2.c
@@ -41,7 +41,7 @@  enum
   zh_TW, cmn_TW, hak_TW, nan_TW, lzh_TW
 };
 
-static const char *formats[] = { "%EY", "%_EY", "%-EY" };
+static const char *formats[] = { "%EY", "%_EY", "%-EY", "%OEY" };
 
 typedef struct
 {
@@ -88,13 +88,23 @@  static void
 mkreftable (void)
 {
   int i, j, k, yr;
-  const char *era, *sfx;
+  const char *era, *alt_yr, *sfx;
   /* Japanese era year to be checked.  */
   static const int yrj[] =
   {
     43, 44, 45, 2,
     63, 64, 1, 2, 9, 10, 22, 23, 31, 1
   };
+  /* Japanese era year in Chinese numeral to be checked.  */
+  static const char *alt_yrj[] =
+  {
+    "\u56db\u5341\u4e09", "\u56db\u5341\u56db",
+    "\u56db\u5341\u4e94", "\u4e8c",
+    "\u516d\u5341\u4e09", "\u516d\u5341\u56db",
+    "", "\u4e8c", "\u4e5d", "\u5341",
+    "\u4e8c\u5341\u4e8c", "\u4e8c\u5341\u4e09",
+    "\u4e09\u5341\u4e00", ""
+  };
   /* Buddhist calendar year to be checked.  */
   static const int yrb[] =
   {
@@ -108,6 +118,14 @@  mkreftable (void)
     -2, -1, 1, 2,
     77, 78, 78, 79, 86, 87, 99, 100, 108, 108
   };
+  /* R.O.C. calendar year in Chinese numeral to be checked.  */
+  static const char *alt_yrc[] =
+  {
+    "\u4e8c", "\u4e00", "", "\u4e8c",
+    "\u4e03\u5341\u4e03", "\u4e03\u5341\u516b", "\u4e03\u5341\u516b",
+    "\u4e03\u5341\u4e5d", "\u516b\u5341\u516d", "\u516b\u5341\u4e03",
+    "\u4e5d\u5341\u4e5d", "", "", ""
+  };
 
   for (i = 0; i < array_length (locales); i++)
     for (j = 0; j < array_length (formats); j++)
@@ -120,29 +138,36 @@  mkreftable (void)
 		  : (is_before (k,  8,  1, 1989)) ? "\u662d\u548c"
 		  : (is_before (k,  1,  5, 2019)) ? "\u5e73\u6210"
 						  : "\u4ee4\u548c";
-	      yr = yrj[k], sfx = "\u5e74";
+	      yr = yrj[k], alt_yr = alt_yrj[k], sfx = "\u5e74";
 	    }
 	  else if (i == lo_LA)
-	    era = "\u0e9e.\u0eaa. ", yr = yrb[k], sfx = "";
+	    era = "\u0e9e.\u0eaa. ", yr = yrb[k], alt_yr = sfx = "";
 	  else if (i == th_TH)
-	    era = "\u0e1e.\u0e28. ", yr = yrb[k], sfx = "";
+	    era = "\u0e1e.\u0e28. ", yr = yrb[k], alt_yr = sfx = "";
 	  else if (i == zh_TW || i == cmn_TW || i == hak_TW
 		   || i == nan_TW || i == lzh_TW)
 	    {
 	      era = (is_before (k, 1, 1, 1912)) ? "\u6c11\u524d"
 						: "\u6c11\u570b";
-	      yr = yrc[k], sfx = "\u5e74";
+	      yr = yrc[k], alt_yr = alt_yrc[k], sfx = "\u5e74";
 	    }
 	  else
 	    FAIL_EXIT1 ("Invalid table index!");
 	  if (yr == 1)
 	    sprintf (ref[i][j][k], "%s\u5143%s", era, sfx);
-	  else if (j == 0)
+	  else if (j == 0 || (j == 3
+		   && ((i != ja_JP && i != lzh_TW) || abs (yr) > 99
+		   /* Currently, alt_digits in lzh_TW are defined up
+		      to 31, so generating Chinese numerals is
+		      temporarily limited to this range.  */
+		   || (i == lzh_TW && abs (yr) > 31))))
 	    sprintf (ref[i][j][k], "%s%02d%s", era, abs (yr), sfx);
 	  else if (j == 1)
 	    sprintf (ref[i][j][k], "%s%2d%s", era, abs (yr), sfx);
 	  else if (j == 2)
 	    sprintf (ref[i][j][k], "%s%d%s", era, abs (yr), sfx);
+	  else if (j == 3)
+	    sprintf (ref[i][j][k], "%s%s%s", era, alt_yr, sfx);
 	  else
 	    FAIL_EXIT1 ("Invalid table index!");
 	}