[Ada] Implement Ada.Strings.UTF_Encoding

This patch provides the first full implementation of the new Ada
2012 package Ada.Strings.UTF_Encoding, which is also available in
Ada 2005 mode (but not Ada 95 mode, since Wide_Wide_Character is
required). The package is a full implementation of AI05-137-1, see
http://www.ada-auth.org/cgi-bin/cvsweb.cgi/ai05s/ai05-0137-1.txt,
with some additions. Full documentation is in the a-stuten.ads file.

The following is a test of all capabilities with output:

pragma Ada_05;
with Ada.Strings.UTF_Encoding;
use  Ada.Strings.UTF_Encoding;
with Ada.Text_IO; use Ada.Text_IO;

procedure UTF_Test is
   subtype WC is Wide_Character;
   subtype WS is Wide_String;
   subtype WWC is Wide_Wide_Character;
   subtype WWS is Wide_Wide_String;

   procedure Test (Test_Name : String; S1, S2 : WS);
   procedure Test (Test_Name : String; S1, S2 : WWS);
   --  S1 should equal S2 for given test name

   procedure Test (Test_Name : String; S1, S2 : WS) is
   begin
      if S1 = S2 then
         Put_Line ("Test " & Test_Name & " passed");
      else
         Put_Line ("Test " & Test_Name & " failed");
         for J in S1'Range loop
            Put_Line
              ("  S1 ("
               & J'Img
               & " ) = Wide_Character'Val ("
               & Integer'Image (WC'Pos (S1 (J)))
               & " )");
         end loop;

         for J in S2'Range loop
            Put_Line
              ("  S2 ("
               & J'Img
               & " ) = Wide_Character'Val ("
               & Integer'Image (WC'Pos (S2 (J)))
               & " )");
         end loop;
      end if;
   end Test;

   procedure Test (Test_Name : String; S1, S2 : WWS) is
   begin
      if S1 = S2 then
         Put_Line ("Test " & Test_Name & " passed");
      else
         Put_Line ("Test " & Test_Name & " failed");
         for J in S1'Range loop
            Put_Line
              ("  S1 ("
               & J'Img
               & " ) = Wide_Wide_Character'Val ("
               & Integer'Image (WWC'Pos (S1 (J)))
               & " )");
         end loop;

         for J in S2'Range loop
            Put_Line
              ("  S2 ("
               & J'Img
               & " ) = Wide_Wide_Character'Val ("
               & Integer'Image (WWC'Pos (S2 (J)))
               & " )");
         end loop;
      end if;
   end Test;

begin
   --  Test series A: Wide_String in UTF_8

   declare
      T0 : WS := (
             WC'Val (16#70#),
             WC'Val (16#700#),
             WC'Val (16#7000#));
      T1 : String := Encode (T0, UTF_8);
      T2 : String := BOM_8 & T1;
      T3 : WS := Decode (T1, UTF_8);
      T4 : WS := Decode (T2, UTF_8);
      T5 : WS := Decode (T2, Encoding (T2));
   begin
      Test ("A1", T0, T3);
      Test ("A2", T0, T4);
      Test ("A3", T0, T5);
   end;

   --  Test series B: Wide_Wide_String in UTF_8

   declare
      T0 : WWS := (
             WWC'Val (16#70#),
             WWC'Val (16#700#),
             WWC'Val (16#7000#),
             WWC'Val (16#01_7000#));
      T1 : String := Encode (T0, UTF_8);
      T2 : String := BOM_8 & T1;
      T3 : WWS := Decode (T1, UTF_8);
      T4 : WWS := Decode (T2, UTF_8);
      T5 : WWS := Decode (T2, Encoding (T2));
   begin
      Test ("B1", T0, T3);
      Test ("B2", T0, T4);
      Test ("B3", T0, T5);
   end;

   --  Test series C: Wide_String in UTF_16LE

   declare
      T0 : WS := (
             WC'Val (16#8900#),
             WC'Val (16#E900#));
      T1 : String := Encode (T0, UTF_16LE);
      T2 : String := BOM_16LE & T1;
      T3 : WS := Decode (T1, UTF_16LE);
      T4 : WS := Decode (T2, UTF_16LE);
      T5 : WS := Decode (T2, Encoding (T2));
   begin
      Test ("C1", T0, T3);
      Test ("C2", T0, T4);
      Test ("C3", T0, T5);
   end;

   --  Test series D: Wide_String in UTF_16BE

   declare
      T0 : WS := (
             WC'Val (16#8900#),
             WC'Val (16#E900#));
      T1 : String := Encode (T0, UTF_16BE);
      T2 : String := BOM_16BE & T1;
      T3 : WS := Decode (T1, UTF_16BE);
      T4 : WS := Decode (T2, UTF_16BE);
      T5 : WS := Decode (T2, Encoding (T2));
   begin
      Test ("D1", T0, T3);
      Test ("D2", T0, T4);
      Test ("D3", T0, T5);
   end;

   --  Test series E: Wide_Wide_String in UTF_16BE

   declare
      T0 : WWS := (
             WWC'Val (16#00_8900#),
             WWC'Val (16#00_E900#),
             WWC'Val (16#07_0000#));
      T1 : String := Encode (T0, UTF_16BE);
      T2 : String := BOM_16BE & T1;
      T3 : WWS := Decode (T1, UTF_16BE);
      T4 : WWS := Decode (T2, UTF_16BE);
      T5 : WWS := Decode (T2, Encoding (T2));
   begin
      Test ("E1", T0, T3);
      Test ("E2", T0, T4);
      Test ("E3", T0, T5);
   end;

   --  Test series F: Wide_Wide_String in UTF_16LE

   declare
      T0 : WWS := (
             WWC'Val (16#00_8900#),
             WWC'Val (16#00_E900#),
             WWC'Val (16#07_0000#));
      T1 : String := Encode (T0, UTF_16LE);
      T2 : String := BOM_16LE & T1;
      T3 : WWS := Decode (T1, UTF_16LE);
      T4 : WWS := Decode (T2, UTF_16LE);
      T5 : WWS := Decode (T2, Encoding (T2));
   begin
      Test ("F1", T0, T3);
      Test ("F2", T0, T4);
      Test ("F3", T0, T5);
   end;

   --  Test series G: Wide_String in UTF_16

   declare
      T0 : WS := (
             WC'Val (16#00_8900#),
             WC'Val (16#00_E900#));
      T1 : WS := Encode (T0, UTF_16);
      T2 : WS := BOM_16 & T1;
      T3 : WS := Decode (T1, UTF_16);
      T4 : WS := Decode (T2, UTF_16);
      T5 : WS := Decode (T2, Encoding (T2));
   begin
      Test ("G1", T0, T3);
      Test ("G2", T0, T4);
      Test ("G3", T0, T5);
   end;

   --  Test series H:  Wide_Wide_String in UTF_16

   declare
      T0 : WWS := (
             WWC'Val (16#00_8900#),
             WWC'Val (16#00_E900#),
             WWC'Val (16#07_0000#));
      T1 : WS := Encode (T0, UTF_16);
      T2 : WS := BOM_16 & T1;
      T3 : WWS := Decode (T1, UTF_16);
      T4 : WWS := Decode (T2, UTF_16);
      T5 : WWS := Decode (T2, Encoding (T2));
   begin
      Test ("H1", T0, T3);
      Test ("H2", T0, T4);
      Test ("H3", T0, T5);
   end;

   --  Test series I: Invalid codes in Wide_String

   declare
      T0 : WS := (
             WC'Val (16#D900#),
             WC'Val (16#E900#));
   begin
      begin
         declare
            T1 : String := Encode (T0, UTF_16LE);
         begin
            null;
         end;
         Put_Line ("Test I1 failed");
      exception
         when Constraint_Error =>
            Put_Line ("Test I1 passed");
      end;

      begin
         declare
            T1 : String := Encode (T0, UTF_16BE);
         begin
            null;
         end;
         Put_Line ("Test I2 failed");
      exception
         when Constraint_Error =>
            Put_Line ("Test I2 passed");
      end;

      begin
         declare
            T1 : Wide_String := Encode (T0, UTF_16);
         begin
            null;
         end;
         Put_Line ("Test I3 failed");
      exception
         when Constraint_Error =>
            Put_Line ("Test I3 passed");
      end;
   end;

   --  Test series J: Invalid codes in Wide_Wide_String

   declare
      T0 : WWS := (
             WWC'Val (16#00_D900#),
             WWC'Val (16#00_E900#));
   begin
      begin
         declare
            T1 : String := Encode (T0, UTF_16LE);
         begin
            null;
         end;
         Put_Line ("Test J1 failed");
      exception
         when Constraint_Error =>
            Put_Line ("Test J1 passed");
      end;

      begin
         declare
            T1 : String := Encode (T0, UTF_16BE);
         begin
            null;
         end;
         Put_Line ("Test J2 failed");
      exception
         when Constraint_Error =>
            Put_Line ("Test J2 passed");
      end;

      begin
         declare
            T1 : Wide_String := Encode (T0, UTF_16);
         begin
            null;
         end;
         Put_Line ("Test J3 failed");
      exception
         when Constraint_Error =>
            Put_Line ("Test J3 passed");
      end;
   end;

   --  Test series K: Invalid UTF-8 codes

   declare
      T0 : String := (
             Character'Val (16#80#),
             Character'Val (16#20#));
      T1 : String := (
             Character'Val (16#C1#),
             Character'Val (16#05#));

   begin
      begin
         declare
            T2 : Wide_String := Decode (T0, UTF_8);
         begin
            null;
         end;
         Put_Line ("Test K1 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test K1 passed");
      end;

      begin
         declare
            T2 : Wide_Wide_String := Decode (T0, UTF_8);
         begin
            null;
         end;
         Put_Line ("Test K2 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test K2 passed");
      end;

       begin
         declare
            T2 : Wide_String := Decode (T1, UTF_8);
         begin
            null;
         end;
         Put_Line ("Test K3 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test K3 passed");
      end;

      begin
         declare
            T2 : Wide_Wide_String := Decode (T0, UTF_8);
         begin
            null;
         end;
         Put_Line ("Test K4 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test K4 passed");
      end;
   end;

   --  Test series L: Invalid UTF-8 codes

   declare
      T0 : Wide_String := (
             WC'Val (16#DC00#),
             WC'Val (16#007F#));
      T1 : Wide_String := (
             WC'Val (16#D801#),
             WC'Val (16#D801#));

   begin
      begin
         declare
            T2 : Wide_String := Decode (T0, UTF_16);
         begin
            null;
         end;
         Put_Line ("Test L1 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test L1 passed");
      end;

      begin
         declare
            T2 : Wide_Wide_String := Decode (T0, UTF_16);
         begin
            null;
         end;
         Put_Line ("Test L2 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test L2 passed");
      end;

       begin
         declare
            T2 : Wide_String := Decode (T1, UTF_16);
         begin
            null;
         end;
         Put_Line ("Test L3 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test L3 passed");
      end;

      begin
         declare
            T2 : Wide_Wide_String := Decode (T0, UTF_16);
         begin
            null;
         end;
         Put_Line ("Test L4 failed");
      exception
         when Encoding_Error =>
            Put_Line ("Test L4 passed");
      end;
   end;
end UTF_Test;

And the expected output is:

Test A1 passed
Test A2 passed
Test A3 passed
Test B1 passed
Test B2 passed
Test B3 passed
Test C1 passed
Test C2 passed
Test C3 passed
Test D1 passed
Test D2 passed
Test D3 passed
Test E1 passed
Test E2 passed
Test E3 passed
Test F1 passed
Test F2 passed
Test F3 passed
Test G1 passed
Test G2 passed
Test G3 passed
Test H1 passed
Test H2 passed
Test H3 passed
Test I1 passed
Test I2 passed
Test I3 passed
Test J1 passed
Test J2 passed
Test J3 passed
Test K1 passed
Test K2 passed
Test K3 passed
Test K4 passed
Test L1 passed
Test L2 passed
Test L3 passed
Test L4 passed

Tested on x86_64-pc-linux-gnu, committed on trunk

2010-06-23  Robert Dewar  <dewar@adacore.com>

	* a-stuten.ads, a-stuten.adb: New files.
	* impunit.adb: Add engtry for Ada.Strings.UTF_Encoding (a-stuten.ads)
	* Makefile.rtl: Add entry for a-stuten (Ada.Strings.UTF_Encoding)

[Ada] Implement Ada.Strings.UTF_Encoding

Commit Message

Patch