===================================================================
@@ -7,7 +7,7 @@
-- B o d y --
-- --
-- Copyright (C) 1986 by University of Toronto. --
+-- Copyright (C) 1999-2014, AdaCore --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
@@ -410,10 +410,13 @@
procedure Parse
(Parenthesized : Boolean;
+ Capturing : Boolean;
Flags : out Expression_Flags;
IP : out Pointer);
-- Parse regular expression, i.e. main body or parenthesized thing
-- Caller must absorb opening parenthesis.
+ -- Capturing should be set to True when we have an open parenthesis
+ -- from which we want the user to extra text.
procedure Parse_Branch
(Flags : out Expression_Flags;
@@ -831,9 +834,10 @@
-- the branches to what follows makes it hard to avoid.
procedure Parse
- (Parenthesized : Boolean;
- Flags : out Expression_Flags;
- IP : out Pointer)
+ (Parenthesized : Boolean;
+ Capturing : Boolean;
+ Flags : out Expression_Flags;
+ IP : out Pointer)
is
E : String renames Expression;
Br, Br2 : Pointer;
@@ -847,7 +851,7 @@
-- Make an OPEN node, if parenthesized
- if Parenthesized then
+ if Parenthesized and then Capturing then
if Matcher.Paren_Count > Max_Paren_Count then
Fail ("too many ()");
end if;
@@ -856,7 +860,6 @@
Matcher.Paren_Count := Matcher.Paren_Count + 1;
IP := Emit_Node (OPEN);
Emit (Character'Val (Par_No));
-
else
IP := 0;
Par_No := 0;
@@ -913,14 +916,19 @@
-- Make a closing node, and hook it on the end
if Parenthesized then
- Ender := Emit_Node (CLOSE);
- Emit (Character'Val (Par_No));
+ if Capturing then
+ Ender := Emit_Node (CLOSE);
+ Emit (Character'Val (Par_No));
+ Link_Tail (IP, Ender);
+ else
+ -- need to keep looking after the closing parenthesis
+ null;
+ end if;
else
Ender := Emit_Node (EOP);
+ Link_Tail (IP, Ender);
end if;
- Link_Tail (IP, Ender);
-
if Have_Branch and then Emit_Ptr <= PM.Size + 1 then
-- Hook the tails of the branches to the closing node
@@ -945,7 +953,7 @@
elsif Parse_Pos <= Parse_End then
if E (Parse_Pos) = ')' then
- Fail ("unmatched ()");
+ Fail ("unmatched ')'");
else
Fail ("junk on end"); -- "Can't happen"
end if;
@@ -1003,16 +1011,24 @@
New_Flags : Expression_Flags;
begin
- Parse (True, New_Flags, IP);
-
- if IP = 0 then
- return;
+ if Parse_Pos <= Parse_End - 1
+ and then Expression (Parse_Pos) = '?'
+ and then Expression (Parse_Pos + 1) = ':'
+ then
+ Parse_Pos := Parse_Pos + 2;
+ -- non-capturing parenthesis
+ Parse (True, False, New_Flags, IP);
+ else
+ -- capturing parenthesis
+ Parse (True, True, New_Flags, IP);
+ Expr_Flags.Has_Width :=
+ Expr_Flags.Has_Width or else New_Flags.Has_Width;
+ Expr_Flags.SP_Start :=
+ Expr_Flags.SP_Start or else New_Flags.SP_Start;
+ if IP = 0 then
+ return;
+ end if;
end if;
-
- Expr_Flags.Has_Width :=
- Expr_Flags.Has_Width or else New_Flags.Has_Width;
- Expr_Flags.SP_Start :=
- Expr_Flags.SP_Start or else New_Flags.SP_Start;
end;
when '|' | ASCII.LF | ')' =>
@@ -1971,7 +1987,7 @@
-- Start of processing for Compile
begin
- Parse (False, Expr_Flags, Result);
+ Parse (False, False, Expr_Flags, Result);
if Result = 0 then
Fail ("Couldn't compile expression");
===================================================================
@@ -7,7 +7,7 @@
-- S p e c --
-- --
-- Copyright (C) 1986 by University of Toronto. --
+-- Copyright (C) 1996-2014, AdaCore --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
@@ -78,8 +78,10 @@
-- ::= [^ range range ...] -- matches any character not listed
-- ::= . -- matches any single character
-- -- except newlines
- -- ::= ( expr ) -- parens used for grouping
- -- ::= \ num -- reference to num-th parenthesis
+ -- ::= ( expr ) -- parenthesis used for grouping
+ -- ::= (?: expr ) -- non-capturing parenthesis
+ -- ::= \ num -- reference to num-th capturing
+ -- parenthesis
-- range ::= char - char -- matches chars in given range
-- ::= nchr
@@ -345,6 +347,9 @@
-- N'th parenthesized subexpressions; Matches (0) is for the whole
-- expression.
--
+ -- Non-capturing parenthesis (introduced with (?:...)) can not be
+ -- retrieved and do not count in the match array index.
+ --
-- For instance, if your regular expression is: "a((b*)c+)(d+)", then
-- 12 3
-- Matches (0) is for "a((b*)c+)(d+)" (the entire expression)