diff mbox series

[Ada] Warn for bidirectional characters

Message ID 20211110085850.GA2811262@adacore.com
State New
Headers show
Series [Ada] Warn for bidirectional characters | expand

Commit Message

Pierre-Marie de Rodat Nov. 10, 2021, 8:58 a.m. UTC
Bidirectional characters can cause security vulnerabilities, as
explained in the paper mentioned in a comment in this patch.
Therefore, we warn if such characters appear in string_literals,
character_literals, or comments.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

	* scng.adb (Check_Bidi): New procedure to give warning. Note
	that this is called only for non-ASCII characters, so should not
	be an efficiency issue.
	(Slit): Call Check_Bidi for wide characters in string_literals.
	(Minus_Case): Call Check_Bidi for wide characters in comments.
	(Char_Literal_Case): Call Check_Bidi for wide characters in
	character_literals.  Move Accumulate_Checksum down, because
	otherwise, if Err is True, the Code is uninitialized.
	* errout.ads: Make the obsolete nature of "Insertion character
	?" more prominent; one should not have to read several
	paragraphs before finding out that it's obsolete.
diff mbox series

Patch

diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads
--- a/gcc/ada/errout.ads
+++ b/gcc/ada/errout.ads
@@ -275,7 +275,7 @@  package Errout is
    --      contain subprograms to be inlined in the main program. It is also
    --      used by the Compiler_Unit_Warning pragma for similar reasons.
 
-   --    Insertion character ? (Question: warning message)
+   --    Insertion character ? (Question: warning message -- OBSOLETE)
    --      The character ? appearing anywhere in a message makes the message
    --      warning instead of a normal error message, and the text of the
    --      message will be preceded by "warning:" in the normal case. The
@@ -302,7 +302,7 @@  package Errout is
    --      clear that the continuation is part of a warning message, but it is
    --      not necessary to go through any computational effort to include it.
    --
-   --      Note: this usage is obsolete, use ?? ?*? ?$? ?x? ?.x? ?_x? to
+   --      Note: this usage is obsolete; use ?? ?*? ?$? ?x? ?.x? ?_x? to
    --      specify the string to be added when Warn_Doc_Switch is set to True.
    --      If this switch is True, then for simple ? messages it has no effect.
    --      This simple form is to ease transition and may be removed later


diff --git a/gcc/ada/scng.adb b/gcc/ada/scng.adb
--- a/gcc/ada/scng.adb
+++ b/gcc/ada/scng.adb
@@ -322,6 +322,49 @@  package body Scng is
       --  Returns True if the scan pointer is pointing to the start of a wide
       --  character sequence, does not modify the scan pointer in any case.
 
+      procedure Check_Bidi (Code : Char_Code);
+      --  Give a warning if Code is a bidirectional character, which can cause
+      --  security vulnerabilities. See the following article:
+      --
+      --  @article{boucher_trojansource_2021,
+      --      title = {Trojan {Source}: {Invisible} {Vulnerabilities}},
+      --      author = {Nicholas Boucher and Ross Anderson},
+      --      year = {2021},
+      --      journal = {Preprint},
+      --      eprint = {2111.00169},
+      --      archivePrefix = {arXiv},
+      --      primaryClass = {cs.CR},
+      --      url = {https://arxiv.org/abs/2111.00169}
+      --  }
+
+      ----------------
+      -- Check_Bidi --
+      ----------------
+
+      type Bidi_Characters is
+        (LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI);
+      Bidi_Character_Codes : constant array (Bidi_Characters) of Char_Code :=
+        (LRE => 16#202A#,
+         RLE => 16#202B#,
+         LRO => 16#202D#,
+         RLO => 16#202E#,
+         LRI => 16#2066#,
+         RLI => 16#2067#,
+         FSI => 16#2068#,
+         PDF => 16#202C#,
+         PDI => 16#2069#);
+      --  Above are the bidirectional characters, along with their Unicode code
+      --  points.
+
+      procedure Check_Bidi (Code : Char_Code) is
+      begin
+         for Bidi_Code of Bidi_Character_Codes loop
+            if Code = Bidi_Code then
+               Error_Msg ("??bidirectional wide character", Wptr);
+            end if;
+         end loop;
+      end Check_Bidi;
+
       -----------------------
       -- Double_Char_Token --
       -----------------------
@@ -1070,6 +1113,8 @@  package body Scng is
                   if Err then
                      Error_Illegal_Wide_Character;
                      Code := Get_Char_Code (' ');
+                  else
+                     Check_Bidi (Code);
                   end if;
 
                   Accumulate_Checksum (Code);
@@ -1611,11 +1656,11 @@  package body Scng is
 
                   elsif Start_Of_Wide_Character then
                      declare
-                        Wptr : constant Source_Ptr := Scan_Ptr;
                         Code : Char_Code;
                         Err  : Boolean;
 
                      begin
+                        Wptr := Scan_Ptr;
                         Scan_Wide (Source, Scan_Ptr, Code, Err);
 
                         --  If not well formed wide character, then just skip
@@ -1629,6 +1674,8 @@  package body Scng is
                         elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
                            Scan_Ptr := Wptr;
                            exit;
+                        else
+                           Check_Bidi (Code);
                         end if;
                      end;
 
@@ -1736,7 +1783,6 @@  package body Scng is
                if Start_Of_Wide_Character then
                   Wptr := Scan_Ptr;
                   Scan_Wide (Source, Scan_Ptr, Code, Err);
-                  Accumulate_Checksum (Code);
 
                   if Err then
                      Error_Illegal_Wide_Character;
@@ -1752,8 +1798,12 @@  package body Scng is
                      Error_Msg -- CODEFIX
                        ("(Ada 2005) non-graphic character not permitted " &
                         "in character literal", Wptr);
+                  else
+                     Check_Bidi (Code);
                   end if;
 
+                  Accumulate_Checksum (Code);
+
                   if Source (Scan_Ptr) /= ''' then
                         Error_Msg_S ("missing apostrophe");
                   else