[v2,7/14,BZ,#14095] update collation data from Unicode / ISO 14651

Message ID s9dtvuva068.fsf@taka.site
State New
Headers show
Series
  • [v2,1/14,BZ,#14095] update collation data from Unicode / ISO 14651
Related show

Commit Message

Mike FABIAN Feb. 5, 2018, 4:09 p.m.

Patch

From 9212043a974e26eab2654aa19603a5fc73911936 Mon Sep 17 00:00:00 2001
From: Mike FABIAN <mfabian@redhat.com>
Date: Tue, 30 Jan 2018 15:45:05 +0100
Subject: [PATCH 07/14] Add sections for various scripts to the
 iso14651_t1_common file

---
 localedata/locales/iso14651_t1_common | 77 +++++++++++++++++++++++++++++++----
 1 file changed, 68 insertions(+), 9 deletions(-)

diff --git a/localedata/locales/iso14651_t1_common b/localedata/locales/iso14651_t1_common
index 2d5fdfa87b..a049c2fec5 100644
--- a/localedata/locales/iso14651_t1_common
+++ b/localedata/locales/iso14651_t1_common
@@ -18,6 +18,29 @@  LC_COLLATE
 % Autogenerated Common Template Table
 %   created from unidata-9.0.0.txt
 
+% Declaration of scripts
+script <SPECIAL>
+script <LATIN>
+script <TIFINAGH>
+script <ARAB>
+script <HEBREU>
+script <GREC>
+script <CYRIL>
+script <ARMENIAN>
+script <GEORGIAN>
+script <DEVANAGARI>
+script <GUJARATI>
+script <TELUGU>
+script <GURUMUKHI>
+script <KANNADA>
+script <TAMIL>
+script <SINHALA>
+script <MALAYALAM>
+script <BENGALI>
+script <MYANMAR>
+script <TIBETAN>
+script <Ethi>
+
 % Declaration of collating symbols
 
 % Many symbols (such as <S0060>) are declared and assigned a weight
@@ -54801,15 +54824,7 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 
 <SFFFF> % Largest primary weight
 
-ifdef DIACRIT_BACKWARD
-order_start forward;backward;forward;forward,position
-else
-order_start forward;forward;forward;forward,position
-endif
-
-% Decomment the first order_start line to specify directions for each level.
-%   To tailor for French accent handling, instead decomment the second
-%   order_start statement.
+order_start <SPECIAL>;forward;backward;forward;forward,position
 
 % Note: The following list of symbol_element's has been generated in
 %   sorted order, to assist in understanding the string ordering that
@@ -64305,6 +64320,12 @@  endif
 <U33E8> "<S0039><RFB40><TE5E5>";"<BASE><BASE>";"<COMPAT><COMPAT>";<U33E8> % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY NINE
 <U32C8> "<S0039><RFB40><TE708>";"<BASE><BASE>";"<COMPAT><COMPAT>";<U32C8> % IDEOGRAPHIC TELEGRAPH SYMBOL FOR SEPTEMBER
 <U3361> "<S0039><RFB40><TF0B9>";"<BASE><BASE>";"<COMPAT><COMPAT>";<U3361> % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR NINE
+order_end
+ifdef DIACRIT_BACKWARD
+order_start <LATIN>;forward;backward;forward;forward,position
+else
+order_start <LATIN>;forward;forward;forward;forward,position
+endif
 <U0061> <S0061>;<BASE>;<MIN>;<U0061> % LATIN SMALL LETTER A
 <UFF41> <S0061>;<BASE>;<WIDE>;<UFF41> % FULLWIDTH LATIN SMALL LETTER A
 <U0363> <S0061>;<BASE>;<COMPAT>;<U0363> % COMBINING LATIN SMALL LETTER A
@@ -66739,6 +66760,8 @@  endif
 <U0001D736> <S03B1>;<BASE>;<FONT>;<U0001D736> % MATHEMATICAL BOLD ITALIC SMALL ALPHA
 <U0001D770> <S03B1>;<BASE>;<FONT>;<U0001D770> % MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA
 <U0001D7AA> <S03B1>;<BASE>;<FONT>;<U0001D7AA> % MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA
+order_end
+order_start <GREC>;forward;forward;forward;forward,position
 <U0391> <S03B1>;<BASE>;<CAP>;<U0391> % GREEK CAPITAL LETTER ALPHA
 <U0001D6A8> <S03B1>;<BASE>;<FONTCAP>;<U0001D6A8> % MATHEMATICAL BOLD CAPITAL ALPHA
 <U0001D6E2> <S03B1>;<BASE>;<FONTCAP>;<U0001D6E2> % MATHEMATICAL ITALIC CAPITAL ALPHA
@@ -67499,6 +67522,8 @@  endif
 <U2CE0> <S2CE1>;<BASE>;<CAP>;<U2CE0> % COPTIC CAPITAL LETTER OLD NUBIAN NYI
 <U2CE3> <S2CE3>;<BASE>;<MIN>;<U2CE3> % COPTIC SMALL LETTER OLD NUBIAN WAU
 <U2CE2> <S2CE3>;<BASE>;<CAP>;<U2CE2> % COPTIC CAPITAL LETTER OLD NUBIAN WAU
+order_end
+order_start <CYRIL>;forward;forward;forward;forward,position
 <U0430> <S0430>;<BASE>;<MIN>;<U0430> % CYRILLIC SMALL LETTER A
 <U2DF6> <S0430>;<BASE>;<COMPAT>;<U2DF6> % COMBINING CYRILLIC LETTER A
 <U0410> <S0430>;<BASE>;<CAP>;<U0410> % CYRILLIC CAPITAL LETTER A
@@ -68104,6 +68129,8 @@  endif
 <U00010373> <S10373>;<BASE>;<MIN>;<U00010373> % OLD PERMIC LETTER YU
 <U00010374> <S10374>;<BASE>;<MIN>;<U00010374> % OLD PERMIC LETTER YA
 <U00010375> <S10375>;<BASE>;<MIN>;<U00010375> % OLD PERMIC LETTER IA
+order_end
+order_start <GEORGIAN>;forward;forward;forward;forward,position
 <U10D0> <S10D0>;<BASE>;<MIN>;<U10D0> % GEORGIAN LETTER AN
 <U2D00> <S2D00>;<BASE>;<MIN>;<U2D00> % GEORGIAN SMALL LETTER AN
 <U10A0> <S2D00>;<BASE>;<CAP>;<U10A0> % GEORGIAN CAPITAL LETTER AN
@@ -68231,6 +68258,8 @@  endif
 <U10CD> <S2D2D>;<BASE>;<CAP>;<U10CD> % GEORGIAN CAPITAL LETTER AEN
 <U10FE> <S10FE>;<BASE>;<MIN>;<U10FE> % GEORGIAN LETTER HARD SIGN
 <U10FF> <S10FF>;<BASE>;<MIN>;<U10FF> % GEORGIAN LETTER LABIAL SIGN
+order_end
+order_start <ARMENIAN>;forward;forward;forward;forward,position
 <U0561> <S0561>;<BASE>;<MIN>;<U0561> % ARMENIAN SMALL LETTER AYB
 <U0531> <S0561>;<BASE>;<CAP>;<U0531> % ARMENIAN CAPITAL LETTER AYB
 <U0562> <S0562>;<BASE>;<MIN>;<U0562> % ARMENIAN SMALL LETTER BEN
@@ -68314,6 +68343,8 @@  endif
 <U0586> <S0586>;<BASE>;<MIN>;<U0586> % ARMENIAN SMALL LETTER FEH
 <U0556> <S0586>;<BASE>;<CAP>;<U0556> % ARMENIAN CAPITAL LETTER FEH
 <U0559> <S0559>;<BASE>;<MIN>;<U0559> % ARMENIAN MODIFIER LETTER LEFT HALF RING
+order_end
+order_start <HEBREU>;forward;forward;forward;forward,position
 <U05D0> <S05D0>;<BASE>;<MIN>;<U05D0> % HEBREW LETTER ALEF
 <U2135> <S05D0>;<BASE>;<COMPAT>;<U2135> % ALEF SYMBOL
 <UFB21> <S05D0>;<BASE>;<FONT>;<UFB21> % HEBREW LETTER WIDE ALEF
@@ -68440,6 +68471,8 @@  endif
 <U0817> <S0817>;<BASE>;<MIN>;<U0817> % SAMARITAN MARK IN-ALAF
 <U081A> <S081A>;<BASE>;<MIN>;<U081A> % SAMARITAN MODIFIER LETTER EPENTHETIC YUT
 <U081B> <S081B>;<BASE>;<MIN>;<U081B> % SAMARITAN MARK EPENTHETIC YUT
+order_end
+order_start <ARAB>;forward;forward;forward;forward,position
 <U0621> <S0621>;<BASE>;<MIN>;<U0621> % ARABIC LETTER HAMZA
 <U0674> <S0621>;<BASE>;<COMPAT>;<U0674> % ARABIC LETTER HIGH HAMZA
 <UFE80> <S0621>;<BASE>;<AISO>;<UFE80> % ARABIC LETTER HAMZA ISOLATED FORM
@@ -69669,6 +69702,8 @@  endif
 <U07E7> <S07E7>;<BASE>;<MIN>;<U07E7> % NKO LETTER NYA WOLOSO
 <U07F4> <S07F4>;<BASE>;<MIN>;<U07F4> % NKO HIGH TONE APOSTROPHE
 <U07F5> <S07F5>;<BASE>;<MIN>;<U07F5> % NKO LOW TONE APOSTROPHE
+order_end
+order_start <TIFINAGH>;forward;forward;forward;forward,position
 <U2D30> <S2D30>;<BASE>;<MIN>;<U2D30> % TIFINAGH LETTER YA
 <U2D31> <S2D31>;<BASE>;<MIN>;<U2D31> % TIFINAGH LETTER YAB
 <U2D32> <S2D32>;<BASE>;<MIN>;<U2D32> % TIFINAGH LETTER YABH
@@ -69726,6 +69761,8 @@  endif
 <U2D64> <S2D64>;<BASE>;<MIN>;<U2D64> % TIFINAGH LETTER TAWELLEMET YAZ
 <U2D65> <S2D65>;<BASE>;<MIN>;<U2D65> % TIFINAGH LETTER YAZZ
 <U2D6F> <S2D6F>;<BASE>;<MIN>;<U2D6F> % TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+order_end
+order_start <Ethi>;forward;forward;forward;forward,position
 <U1200> <S1200>;<BASE>;<MIN>;<U1200> % ETHIOPIC SYLLABLE HA
 <U1201> <S1201>;<BASE>;<MIN>;<U1201> % ETHIOPIC SYLLABLE HU
 <U1202> <S1202>;<BASE>;<MIN>;<U1202> % ETHIOPIC SYLLABLE HI
@@ -70179,6 +70216,8 @@  endif
 <U2DDC> <S2DDC>;<BASE>;<MIN>;<U2DDC> % ETHIOPIC SYLLABLE GYEE
 <U2DDD> <S2DDD>;<BASE>;<MIN>;<U2DDD> % ETHIOPIC SYLLABLE GYE
 <U2DDE> <S2DDE>;<BASE>;<MIN>;<U2DDE> % ETHIOPIC SYLLABLE GYO
+order_end
+order_start <DEVANAGARI>;forward;forward;forward;forward,position
 <U0950> <S0950>;<BASE>;<MIN>;<U0950> % DEVANAGARI OM
 <UA8FD> <SA8FD>;<BASE>;<MIN>;<UA8FD> % DEVANAGARI JAIN OM
 <U0972> <S0972>;<BASE>;<MIN>;<U0972> % DEVANAGARI LETTER CANDRA A
@@ -70302,6 +70341,8 @@  endif
 <U094B> <S094B>;<BASE>;<MIN>;<U094B> % DEVANAGARI VOWEL SIGN O
 <U094C> <S094C>;<BASE>;<MIN>;<U094C> % DEVANAGARI VOWEL SIGN AU
 <U094D> <S094D>;<BASE>;<MIN>;<U094D> % DEVANAGARI SIGN VIRAMA
+order_end
+order_start <BENGALI>;forward;forward;forward;forward,position
 <U0980> <S0980>;<BASE>;<MIN>;<U0980> % BENGALI ANJI
 <U0985> <S0985>;<BASE>;<MIN>;<U0985> % BENGALI LETTER A
 <U0986> <S0986>;<BASE>;<MIN>;<U0986> % BENGALI LETTER AA
@@ -70373,6 +70414,8 @@  endif
 <U09C7_09D7> <S09CC>;<BASE>;<MIN>;<U09CC> % BENGALI VOWEL SIGN AU
 <U09CD> <S09CD>;<BASE>;<MIN>;<U09CD> % BENGALI SIGN VIRAMA
 <U09D7> <S09D7>;<BASE>;<MIN>;<U09D7> % BENGALI AU LENGTH MARK
+order_end
+order_start <GURUMUKHI>;forward;forward;forward;forward,position
 <U0A74> <S0A74>;<BASE>;<MIN>;<U0A74> % GURMUKHI EK ONKAR
 <U0A73> <S0A73>;<BASE>;<MIN>;<U0A73> % GURMUKHI URA
 <U0A09> <S0A09>;<BASE>;<MIN>;<U0A09> % GURMUKHI LETTER U
@@ -70436,6 +70479,8 @@  endif
 <U0A4B> <S0A4B>;<BASE>;<MIN>;<U0A4B> % GURMUKHI VOWEL SIGN OO
 <U0A4C> <S0A4C>;<BASE>;<MIN>;<U0A4C> % GURMUKHI VOWEL SIGN AU
 <U0A4D> <S0A4D>;<BASE>;<MIN>;<U0A4D> % GURMUKHI SIGN VIRAMA
+order_end
+order_start <GUJARATI>;forward;forward;forward;forward,position
 <U0AD0> <S0AD0>;<BASE>;<MIN>;<U0AD0> % GUJARATI OM
 <U0A85> <S0A85>;<BASE>;<MIN>;<U0A85> % GUJARATI LETTER A
 <U0A86> <S0A86>;<BASE>;<MIN>;<U0A86> % GUJARATI LETTER AA
@@ -70577,6 +70622,8 @@  endif
 <U0B4D> <S0B4D>;<BASE>;<MIN>;<U0B4D> % ORIYA SIGN VIRAMA
 <U0B56> <S0B56>;<BASE>;<MIN>;<U0B56> % ORIYA AI LENGTH MARK
 <U0B57> <S0B57>;<BASE>;<MIN>;<U0B57> % ORIYA AU LENGTH MARK
+order_end
+order_start <TAMIL>;forward;forward;forward;forward,position
 <U0BD0> <S0BD0>;<BASE>;<MIN>;<U0BD0> % TAMIL OM
 <U0B85> <S0B85>;<BASE>;<MIN>;<U0B85> % TAMIL LETTER A
 <U0B86> <S0B86>;<BASE>;<MIN>;<U0B86> % TAMIL LETTER AA
@@ -70631,6 +70678,8 @@  endif
 <U0BC6_0BD7> <S0BCC>;<BASE>;<MIN>;<U0BCC> % TAMIL VOWEL SIGN AU
 <U0BCD> <S0BCD>;<BASE>;<MIN>;<U0BCD> % TAMIL SIGN VIRAMA
 <U0BD7> <S0BD7>;<BASE>;<MIN>;<U0BD7> % TAMIL AU LENGTH MARK
+order_end
+order_start <TELUGU>;forward;forward;forward;forward,position
 <U0C05> <S0C05>;<BASE>;<MIN>;<U0C05> % TELUGU LETTER A
 <U0C06> <S0C06>;<BASE>;<MIN>;<U0C06> % TELUGU LETTER AA
 <U0C07> <S0C07>;<BASE>;<MIN>;<U0C07> % TELUGU LETTER I
@@ -70706,6 +70755,8 @@  endif
 <U0C4D> <S0C4D>;<BASE>;<MIN>;<U0C4D> % TELUGU SIGN VIRAMA
 <U0C55> <S0C55>;<BASE>;<MIN>;<U0C55> % TELUGU LENGTH MARK
 <U0C56> <S0C56>;<BASE>;<MIN>;<U0C56> % TELUGU AI LENGTH MARK
+order_end
+order_start <KANNADA>;forward;forward;forward;forward,position
 <U0C85> <S0C85>;<BASE>;<MIN>;<U0C85> % KANNADA LETTER A
 <U0C86> <S0C86>;<BASE>;<MIN>;<U0C86> % KANNADA LETTER AA
 <U0C87> <S0C87>;<BASE>;<MIN>;<U0C87> % KANNADA LETTER I
@@ -70786,6 +70837,8 @@  endif
 <U0CCD> <S0CCD>;<BASE>;<MIN>;<U0CCD> % KANNADA SIGN VIRAMA
 <U0CD5> <S0CD5>;<BASE>;<MIN>;<U0CD5> % KANNADA LENGTH MARK
 <U0CD6> <S0CD6>;<BASE>;<MIN>;<U0CD6> % KANNADA AI LENGTH MARK
+order_end
+order_start <MALAYALAM>;forward;forward;forward;forward,position
 <U0D05> <S0D05>;<BASE>;<MIN>;<U0D05> % MALAYALAM LETTER A
 <U0D06> <S0D06>;<BASE>;<MIN>;<U0D06> % MALAYALAM LETTER AA
 <U0D07> <S0D07>;<BASE>;<MIN>;<U0D07> % MALAYALAM LETTER I
@@ -70872,6 +70925,8 @@  endif
 <U0D46_0D57> <S0D4C>;<BASE>;<MIN>;<U0D4C> % MALAYALAM VOWEL SIGN AU
 <U0D57> <S0D57>;<BASE>;<MIN>;<U0D57> % MALAYALAM AU LENGTH MARK
 <U0D4D> <S0D4D>;<BASE>;<MIN>;<U0D4D> % MALAYALAM SIGN VIRAMA
+order_end
+order_start <SINHALA>;forward;forward;forward;forward,position
 <U0D85> <S0D85>;<BASE>;<MIN>;<U0D85> % SINHALA LETTER AYANNA
 <U0D86> <S0D86>;<BASE>;<MIN>;<U0D86> % SINHALA LETTER AAYANNA
 <U0D87> <S0D87>;<BASE>;<MIN>;<U0D87> % SINHALA LETTER AEYANNA
@@ -72893,6 +72948,8 @@  endif
 <UAAC2> <SAAC2>;<BASE>;<MIN>;<UAAC2> % TAI VIET TONE MAI SONG
 <UAADB> <SAADB>;<BASE>;<MIN>;<UAADB> % TAI VIET SYMBOL KON
 <UAADC> <SAADC>;<BASE>;<MIN>;<UAADC> % TAI VIET SYMBOL NUENG
+order_end
+order_start <TIBETAN>;forward;forward;forward;forward,position
 <U0F40> <S0F40>;<BASE>;<MIN>;<U0F40> % TIBETAN LETTER KA
 <U0F69> "<S0F40><S0FB5>";"<BASE><BASE>";"<MIN><MIN>";<U0F69> % TIBETAN LETTER KSSA
 <U0F90> <S0F90>;<BASE>;<MIN>;<U0F90> % TIBETAN SUBJOINED LETTER KA
@@ -73468,6 +73525,8 @@  endif
 <UA928> <SA928>;<BASE>;<MIN>;<UA928> % KAYAH LI VOWEL U
 <UA929> <SA929>;<BASE>;<MIN>;<UA929> % KAYAH LI VOWEL EE
 <UA92A> <SA92A>;<BASE>;<MIN>;<UA92A> % KAYAH LI VOWEL O
+order_end
+order_start <MYANMAR>;forward;forward;forward;forward,position
 <U1000> <S1000>;<BASE>;<MIN>;<U1000> % MYANMAR LETTER KA
 <U1075> <S1075>;<BASE>;<MIN>;<U1075> % MYANMAR LETTER SHAN KA
 <U1001> <S1001>;<BASE>;<MIN>;<U1001> % MYANMAR LETTER KHA
-- 
2.14.3