1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . R E G E X P --
9 -- Copyright (C) 1999-2010, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 with Ada.Unchecked_Deallocation;
34 with System.Case_Util;
36 package body System.Regexp is
38 Open_Paren : constant Character := '(';
39 Close_Paren : constant Character := ')';
40 Open_Bracket : constant Character := '[';
41 Close_Bracket : constant Character := ']';
43 type State_Index is new Natural;
44 type Column_Index is new Natural;
46 type Regexp_Array is array
47 (State_Index range <>, Column_Index range <>) of State_Index;
48 -- First index is for the state number
49 -- Second index is for the character type
50 -- Contents is the new State
52 type Regexp_Array_Access is access Regexp_Array;
53 -- Use this type through the functions Set below, so that it
54 -- can grow dynamically depending on the needs.
56 type Mapping is array (Character'Range) of Column_Index;
57 -- Mapping between characters and column in the Regexp_Array
59 type Boolean_Array is array (State_Index range <>) of Boolean;
62 (Alphabet_Size : Column_Index;
63 Num_States : State_Index) is
66 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
67 Is_Final : Boolean_Array (1 .. Num_States);
68 Case_Sensitive : Boolean;
70 -- Deterministic finite-state machine
72 -----------------------
73 -- Local Subprograms --
74 -----------------------
77 (Table : in out Regexp_Array_Access;
79 Column : Column_Index;
81 -- Sets a value in the table. If the table is too small, reallocate it
82 -- dynamically so that (State, Column) is a valid index in it.
85 (Table : Regexp_Array_Access;
87 Column : Column_Index)
89 -- Returns the value in the table at (State, Column).
90 -- If this index does not exist in the table, returns 0
92 procedure Free is new Ada.Unchecked_Deallocation
93 (Regexp_Array, Regexp_Array_Access);
99 procedure Adjust (R : in out Regexp) is
103 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
104 Num_States => R.R.Num_States);
115 Glob : Boolean := False;
116 Case_Sensitive : Boolean := True)
119 S : String := Pattern;
120 -- The pattern which is really compiled (when the pattern is case
121 -- insensitive, we convert this string to lower-cases
123 Map : Mapping := (others => 0);
124 -- Mapping between characters and columns in the tables
126 Alphabet_Size : Column_Index := 0;
127 -- Number of significant characters in the regular expression.
128 -- This total does not include special operators, such as *, (, ...
130 procedure Check_Well_Formed_Pattern;
131 -- Check that the pattern to compile is well-formed, so that subsequent
132 -- code can rely on this without performing each time the checks to
133 -- avoid accessing the pattern outside its bounds. However, not all
134 -- well-formedness rules are checked. In particular, rules about special
135 -- characters not being treated as regular characters are not checked.
137 procedure Create_Mapping;
138 -- Creates a mapping between characters in the regexp and columns
139 -- in the tables representing the regexp. Test that the regexp is
140 -- well-formed Modifies Alphabet_Size and Map
142 procedure Create_Primary_Table
143 (Table : out Regexp_Array_Access;
144 Num_States : out State_Index;
145 Start_State : out State_Index;
146 End_State : out State_Index);
147 -- Creates the first version of the regexp (this is a non deterministic
148 -- finite state machine, which is unadapted for a fast pattern
149 -- matching algorithm). We use a recursive algorithm to process the
150 -- parenthesis sub-expressions.
152 -- Table : at the end of the procedure : Column 0 is for any character
153 -- ('.') and the last columns are for no character (closure)
154 -- Num_States is set to the number of states in the table
155 -- Start_State is the number of the starting state in the regexp
156 -- End_State is the number of the final state when the regexp matches
158 procedure Create_Primary_Table_Glob
159 (Table : out Regexp_Array_Access;
160 Num_States : out State_Index;
161 Start_State : out State_Index;
162 End_State : out State_Index);
163 -- Same function as above, but it deals with the second possible
164 -- grammar for 'globbing pattern', which is a kind of subset of the
165 -- whole regular expression grammar.
167 function Create_Secondary_Table
168 (First_Table : Regexp_Array_Access;
169 Num_States : State_Index;
170 Start_State : State_Index;
171 End_State : State_Index)
173 -- Creates the definitive table representing the regular expression
174 -- This is actually a transformation of the primary table First_Table,
175 -- where every state is grouped with the states in its 'no-character'
176 -- columns. The transitions between the new states are then recalculated
177 -- and if necessary some new states are created.
179 -- Note that the resulting finite-state machine is not optimized in
180 -- terms of the number of states : it would be more time-consuming to
181 -- add a third pass to reduce the number of states in the machine, with
182 -- no speed improvement...
184 procedure Raise_Exception (M : String; Index : Integer);
185 pragma No_Return (Raise_Exception);
186 -- Raise an exception, indicating an error at character Index in S
188 -------------------------------
189 -- Check_Well_Formed_Pattern --
190 -------------------------------
192 procedure Check_Well_Formed_Pattern is
195 Past_Elmt : Boolean := False;
196 -- Set to True everywhere an elmt has been parsed, if Glob=False,
197 -- meaning there can be now an occurrence of '*', '+' and '?'.
199 Past_Term : Boolean := False;
200 -- Set to True everywhere a term has been parsed, if Glob=False,
201 -- meaning there can be now an occurrence of '|'.
203 Parenthesis_Level : Integer := 0;
204 Curly_Level : Integer := 0;
206 Last_Open : Integer := S'First - 1;
207 -- The last occurrence of an opening parenthesis, if Glob=False,
208 -- or the last occurrence of an opening curly brace, if Glob=True.
210 procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
211 -- If no more characters are raised, call Raise_Exception
213 --------------------------------------
214 -- Raise_Exception_If_No_More_Chars --
215 --------------------------------------
217 procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
219 if J + K > S'Last then
220 Raise_Exception ("Ill-formed pattern while parsing", J);
222 end Raise_Exception_If_No_More_Chars;
224 -- Start of processing for Check_Well_Formed_Pattern
228 while J <= S'Last loop
232 Raise_Exception_If_No_More_Chars;
237 Raise_Exception_If_No_More_Chars;
241 -- The first character never has a special meaning
243 if S (J) = ']' or else S (J) = '-' then
245 Raise_Exception_If_No_More_Chars;
248 -- The set of characters cannot be empty
252 ("Set of characters cannot be empty in regular "
257 Possible_Range_Start : Boolean := True;
258 -- Set True everywhere a range character '-' can occur
262 exit when S (J) = Close_Bracket;
264 -- The current character should be followed by a
267 Raise_Exception_If_No_More_Chars (1);
270 and then S (J + 1) /= Close_Bracket
272 if not Possible_Range_Start then
274 ("No mix of ranges is allowed in "
275 & "regular expression", J);
279 Raise_Exception_If_No_More_Chars;
281 -- Range cannot be followed by '-' character,
282 -- except as last character in the set.
284 Possible_Range_Start := False;
287 Possible_Range_Start := True;
292 Raise_Exception_If_No_More_Chars;
299 -- A closing bracket can end an elmt or term
304 when Close_Bracket =>
306 -- A close bracket must follow a open_bracket, and cannot be
307 -- found alone on the line.
310 ("Incorrect character ']' in regular expression", J);
316 -- Any character can be an elmt or a term
322 -- \ not allowed at the end of the regexp
325 ("Incorrect character '\' in regular expression", J);
330 Parenthesis_Level := Parenthesis_Level + 1;
333 -- An open parenthesis does not end an elmt or term
341 Parenthesis_Level := Parenthesis_Level - 1;
343 if Parenthesis_Level < 0 then
345 ("')' is not associated with '(' in regular "
349 if J = Last_Open + 1 then
351 ("Empty parentheses not allowed in regular "
355 if not Past_Term then
357 ("Closing parenthesis not allowed here in regular "
361 -- A closing parenthesis can end an elmt or term
369 Curly_Level := Curly_Level + 1;
373 -- Any character can be an elmt or a term
379 -- No need to check for ',' as the code always accepts them
383 Curly_Level := Curly_Level - 1;
385 if Curly_Level < 0 then
387 ("'}' is not associated with '{' in regular "
391 if J = Last_Open + 1 then
393 ("Empty curly braces not allowed in regular "
398 -- Any character can be an elmt or a term
404 when '*' | '?' | '+' =>
407 -- These operators must apply to an elmt sub-expression,
408 -- and cannot be found if one has not just been parsed.
410 if not Past_Elmt then
412 ("'*', '+' and '?' operators must be "
413 & "applied to an element in regular expression", J);
423 -- This operator must apply to a term sub-expression,
424 -- and cannot be found if one has not just been parsed.
426 if not Past_Term then
428 ("'|' operator must be "
429 & "applied to a term in regular expression", J);
439 -- Any character can be an elmt or a term
449 -- A closing parenthesis must follow an open parenthesis
451 if Parenthesis_Level /= 0 then
453 ("'(' must always be associated with a ')'", J);
456 -- A closing curly brace must follow an open curly brace
458 if Curly_Level /= 0 then
460 ("'{' must always be associated with a '}'", J);
462 end Check_Well_Formed_Pattern;
468 procedure Create_Mapping is
470 procedure Add_In_Map (C : Character);
471 -- Add a character in the mapping, if it is not already defined
477 procedure Add_In_Map (C : Character) is
480 Alphabet_Size := Alphabet_Size + 1;
481 Map (C) := Alphabet_Size;
485 J : Integer := S'First;
486 Parenthesis_Level : Integer := 0;
487 Curly_Level : Integer := 0;
488 Last_Open : Integer := S'First - 1;
490 -- Start of processing for Create_Mapping
493 while J <= S'Last loop
502 if S (J) = ']' or else S (J) = '-' then
506 -- The first character never has a special meaning
511 ("Ran out of characters while parsing ", J);
514 exit when S (J) = Close_Bracket;
517 and then S (J + 1) /= Close_Bracket
520 Start : constant Integer := J - 1;
529 for Char in S (Start) .. S (J) loop
544 -- A close bracket must follow a open_bracket,
545 -- and cannot be found alone on the line
547 when Close_Bracket =>
549 ("Incorrect character ']' in regular expression", J);
557 -- \ not allowed at the end of the regexp
560 ("Incorrect character '\' in regular expression", J);
565 Parenthesis_Level := Parenthesis_Level + 1;
568 Add_In_Map (Open_Paren);
573 Parenthesis_Level := Parenthesis_Level - 1;
575 if Parenthesis_Level < 0 then
577 ("')' is not associated with '(' in regular "
581 if J = Last_Open + 1 then
583 ("Empty parenthesis not allowed in regular "
588 Add_In_Map (Close_Paren);
600 Curly_Level := Curly_Level + 1;
607 Curly_Level := Curly_Level - 1;
614 ("'*', '+', '?' and '|' operators cannot be in "
615 & "first position in regular expression", J);
623 -- These operators must apply to a sub-expression,
624 -- and cannot be found at the beginning of the line
627 ("'*', '+', '?' and '|' operators cannot be in "
628 & "first position in regular expression", J);
642 -- A closing parenthesis must follow an open parenthesis
644 if Parenthesis_Level /= 0 then
646 ("'(' must always be associated with a ')'", J);
649 if Curly_Level /= 0 then
651 ("'{' must always be associated with a '}'", J);
655 --------------------------
656 -- Create_Primary_Table --
657 --------------------------
659 procedure Create_Primary_Table
660 (Table : out Regexp_Array_Access;
661 Num_States : out State_Index;
662 Start_State : out State_Index;
663 End_State : out State_Index)
665 Empty_Char : constant Column_Index := Alphabet_Size + 1;
667 Current_State : State_Index := 0;
668 -- Index of the last created state
670 procedure Add_Empty_Char
671 (State : State_Index;
672 To_State : State_Index);
673 -- Add a empty-character transition from State to To_State
675 procedure Create_Repetition
676 (Repetition : Character;
677 Start_Prev : State_Index;
678 End_Prev : State_Index;
679 New_Start : out State_Index;
680 New_End : in out State_Index);
681 -- Create the table in case we have a '*', '+' or '?'.
682 -- Start_Prev .. End_Prev should indicate respectively the start and
683 -- end index of the previous expression, to which '*', '+' or '?' is
686 procedure Create_Simple
687 (Start_Index : Integer;
689 Start_State : out State_Index;
690 End_State : out State_Index);
691 -- Fill the table for the regexp Simple.
692 -- This is the recursive procedure called to handle () expressions
693 -- If End_State = 0, then the call to Create_Simple creates an
694 -- independent regexp, not a concatenation
695 -- Start_Index .. End_Index is the starting index in the string S.
697 -- Warning: it may look like we are creating too many empty-string
698 -- transitions, but they are needed to get the correct regexp.
699 -- The table is filled as follow ( s means start-state, e means
702 -- regexp state_num | a b * empty_string
703 -- ------- ------------------------------
707 -- ab 1 (s) | 2 - - -
724 -- (a) 1 (s) | 2 - - -
740 function Next_Sub_Expression
741 (Start_Index : Integer;
744 -- Returns the index of the last character of the next sub-expression
745 -- in Simple. Index cannot be greater than End_Index.
751 procedure Add_Empty_Char
752 (State : State_Index;
753 To_State : State_Index)
755 J : Column_Index := Empty_Char;
758 while Get (Table, State, J) /= 0 loop
762 Set (Table, State, J, To_State);
765 -----------------------
766 -- Create_Repetition --
767 -----------------------
769 procedure Create_Repetition
770 (Repetition : Character;
771 Start_Prev : State_Index;
772 End_Prev : State_Index;
773 New_Start : out State_Index;
774 New_End : in out State_Index)
777 New_Start := Current_State + 1;
780 Add_Empty_Char (New_End, New_Start);
783 Current_State := Current_State + 2;
784 New_End := Current_State;
786 Add_Empty_Char (End_Prev, New_End);
787 Add_Empty_Char (New_Start, Start_Prev);
789 if Repetition /= '+' then
790 Add_Empty_Char (New_Start, New_End);
793 if Repetition /= '?' then
794 Add_Empty_Char (New_End, New_Start);
796 end Create_Repetition;
802 procedure Create_Simple
803 (Start_Index : Integer;
805 Start_State : out State_Index;
806 End_State : out State_Index)
808 J : Integer := Start_Index;
809 Last_Start : State_Index := 0;
814 while J <= End_Index loop
818 J_Start : constant Integer := J + 1;
819 Next_Start : State_Index;
820 Next_End : State_Index;
823 J := Next_Sub_Expression (J, End_Index);
824 Create_Simple (J_Start, J - 1, Next_Start, Next_End);
827 and then (S (J + 1) = '*' or else
828 S (J + 1) = '+' or else
840 Last_Start := Next_Start;
842 if End_State /= 0 then
843 Add_Empty_Char (End_State, Last_Start);
846 End_State := Next_End;
852 Start_Prev : constant State_Index := Start_State;
853 End_Prev : constant State_Index := End_State;
854 Start_J : constant Integer := J + 1;
855 Start_Next : State_Index := 0;
856 End_Next : State_Index := 0;
859 J := Next_Sub_Expression (J, End_Index);
861 -- Create a new state for the start of the alternative
863 Current_State := Current_State + 1;
864 Last_Start := Current_State;
865 Start_State := Last_Start;
867 -- Create the tree for the second part of alternative
869 Create_Simple (Start_J, J, Start_Next, End_Next);
871 -- Create the end state
873 Add_Empty_Char (Last_Start, Start_Next);
874 Add_Empty_Char (Last_Start, Start_Prev);
875 Current_State := Current_State + 1;
876 End_State := Current_State;
877 Add_Empty_Char (End_Prev, End_State);
878 Add_Empty_Char (End_Next, End_State);
882 Current_State := Current_State + 1;
885 Next_State : State_Index := Current_State + 1;
895 for Column in 0 .. Alphabet_Size loop
896 Set (Table, Current_State, Column,
897 Value => Current_State + 1);
901 -- Automatically add the first character
903 if S (J) = '-' or else S (J) = ']' then
904 Set (Table, Current_State, Map (S (J)),
905 Value => Next_State);
909 -- Loop till closing bracket found
912 exit when S (J) = Close_Bracket;
915 and then S (J + 1) /= ']'
918 Start : constant Integer := J - 1;
927 for Char in S (Start) .. S (J) loop
928 Set (Table, Current_State, Map (Char),
929 Value => Next_State);
938 Set (Table, Current_State, Map (S (J)),
939 Value => Next_State);
945 Current_State := Current_State + 1;
947 -- If the next symbol is a special symbol
950 and then (S (J + 1) = '*' or else
951 S (J + 1) = '+' or else
963 Last_Start := Current_State - 1;
965 if End_State /= 0 then
966 Add_Empty_Char (End_State, Last_Start);
969 End_State := Current_State;
972 when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
974 ("Incorrect character in regular expression :", J);
977 Current_State := Current_State + 1;
979 -- Create the state for the symbol S (J)
982 for K in 0 .. Alphabet_Size loop
983 Set (Table, Current_State, K,
984 Value => Current_State + 1);
992 Set (Table, Current_State, Map (S (J)),
993 Value => Current_State + 1);
996 Current_State := Current_State + 1;
998 -- If the next symbol is a special symbol
1001 and then (S (J + 1) = '*' or else
1002 S (J + 1) = '+' or else
1014 Last_Start := Current_State - 1;
1016 if End_State /= 0 then
1017 Add_Empty_Char (End_State, Last_Start);
1020 End_State := Current_State;
1025 if Start_State = 0 then
1026 Start_State := Last_Start;
1033 -------------------------
1034 -- Next_Sub_Expression --
1035 -------------------------
1037 function Next_Sub_Expression
1038 (Start_Index : Integer;
1039 End_Index : Integer)
1042 J : Integer := Start_Index;
1043 Start_On_Alter : Boolean := False;
1047 Start_On_Alter := True;
1051 exit when J = End_Index;
1058 when Open_Bracket =>
1061 exit when S (J) = Close_Bracket;
1069 J := Next_Sub_Expression (J, End_Index);
1075 if Start_On_Alter then
1085 end Next_Sub_Expression;
1087 -- Start of Create_Primary_Table
1090 Table.all := (others => (others => 0));
1091 Create_Simple (S'First, S'Last, Start_State, End_State);
1092 Num_States := Current_State;
1093 end Create_Primary_Table;
1095 -------------------------------
1096 -- Create_Primary_Table_Glob --
1097 -------------------------------
1099 procedure Create_Primary_Table_Glob
1100 (Table : out Regexp_Array_Access;
1101 Num_States : out State_Index;
1102 Start_State : out State_Index;
1103 End_State : out State_Index)
1105 Empty_Char : constant Column_Index := Alphabet_Size + 1;
1107 Current_State : State_Index := 0;
1108 -- Index of the last created state
1110 procedure Add_Empty_Char
1111 (State : State_Index;
1112 To_State : State_Index);
1113 -- Add a empty-character transition from State to To_State
1115 procedure Create_Simple
1116 (Start_Index : Integer;
1117 End_Index : Integer;
1118 Start_State : out State_Index;
1119 End_State : out State_Index);
1120 -- Fill the table for the S (Start_Index .. End_Index).
1121 -- This is the recursive procedure called to handle () expressions
1123 --------------------
1124 -- Add_Empty_Char --
1125 --------------------
1127 procedure Add_Empty_Char
1128 (State : State_Index;
1129 To_State : State_Index)
1131 J : Column_Index := Empty_Char;
1134 while Get (Table, State, J) /= 0 loop
1138 Set (Table, State, J,
1146 procedure Create_Simple
1147 (Start_Index : Integer;
1148 End_Index : Integer;
1149 Start_State : out State_Index;
1150 End_State : out State_Index)
1152 J : Integer := Start_Index;
1153 Last_Start : State_Index := 0;
1159 while J <= End_Index loop
1162 when Open_Bracket =>
1163 Current_State := Current_State + 1;
1166 Next_State : State_Index := Current_State + 1;
1175 for Column in 0 .. Alphabet_Size loop
1176 Set (Table, Current_State, Column,
1177 Value => Current_State + 1);
1181 -- Automatically add the first character
1183 if S (J) = '-' or else S (J) = ']' then
1184 Set (Table, Current_State, Map (S (J)),
1185 Value => Current_State);
1189 -- Loop till closing bracket found
1192 exit when S (J) = Close_Bracket;
1195 and then S (J + 1) /= ']'
1198 Start : constant Integer := J - 1;
1206 for Char in S (Start) .. S (J) loop
1207 Set (Table, Current_State, Map (Char),
1208 Value => Next_State);
1217 Set (Table, Current_State, Map (S (J)),
1218 Value => Next_State);
1224 Last_Start := Current_State;
1225 Current_State := Current_State + 1;
1227 if End_State /= 0 then
1228 Add_Empty_Char (End_State, Last_Start);
1231 End_State := Current_State;
1236 Start_Regexp_Sub : State_Index;
1237 End_Regexp_Sub : State_Index;
1238 Create_Start : State_Index := 0;
1240 Create_End : State_Index := 0;
1241 -- Initialized to avoid junk warning
1244 while S (J) /= '}' loop
1246 -- First step : find sub pattern
1249 while S (End_Sub) /= ','
1250 and then S (End_Sub) /= '}'
1252 End_Sub := End_Sub + 1;
1255 -- Second step : create a sub pattern
1265 -- Third step : create an alternative
1267 if Create_Start = 0 then
1268 Current_State := Current_State + 1;
1269 Create_Start := Current_State;
1270 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1271 Current_State := Current_State + 1;
1272 Create_End := Current_State;
1273 Add_Empty_Char (End_Regexp_Sub, Create_End);
1276 Current_State := Current_State + 1;
1277 Add_Empty_Char (Current_State, Create_Start);
1278 Create_Start := Current_State;
1279 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1280 Add_Empty_Char (End_Regexp_Sub, Create_End);
1284 if End_State /= 0 then
1285 Add_Empty_Char (End_State, Create_Start);
1288 End_State := Create_End;
1289 Last_Start := Create_Start;
1293 Current_State := Current_State + 1;
1295 if End_State /= 0 then
1296 Add_Empty_Char (End_State, Current_State);
1299 Add_Empty_Char (Current_State, Current_State + 1);
1300 Add_Empty_Char (Current_State, Current_State + 3);
1301 Last_Start := Current_State;
1303 Current_State := Current_State + 1;
1305 for K in 0 .. Alphabet_Size loop
1306 Set (Table, Current_State, K,
1307 Value => Current_State + 1);
1310 Current_State := Current_State + 1;
1311 Add_Empty_Char (Current_State, Current_State + 1);
1313 Current_State := Current_State + 1;
1314 Add_Empty_Char (Current_State, Last_Start);
1315 End_State := Current_State;
1318 Current_State := Current_State + 1;
1321 for K in 0 .. Alphabet_Size loop
1322 Set (Table, Current_State, K,
1323 Value => Current_State + 1);
1331 -- Create the state for the symbol S (J)
1333 Set (Table, Current_State, Map (S (J)),
1334 Value => Current_State + 1);
1337 Last_Start := Current_State;
1338 Current_State := Current_State + 1;
1340 if End_State /= 0 then
1341 Add_Empty_Char (End_State, Last_Start);
1344 End_State := Current_State;
1348 if Start_State = 0 then
1349 Start_State := Last_Start;
1356 -- Start of processing for Create_Primary_Table_Glob
1359 Table.all := (others => (others => 0));
1360 Create_Simple (S'First, S'Last, Start_State, End_State);
1361 Num_States := Current_State;
1362 end Create_Primary_Table_Glob;
1364 ----------------------------
1365 -- Create_Secondary_Table --
1366 ----------------------------
1368 function Create_Secondary_Table
1369 (First_Table : Regexp_Array_Access;
1370 Num_States : State_Index;
1371 Start_State : State_Index;
1372 End_State : State_Index) return Regexp
1374 pragma Warnings (Off, Num_States);
1376 Last_Index : constant State_Index := First_Table'Last (1);
1377 type Meta_State is array (1 .. Last_Index) of Boolean;
1379 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1380 (others => (others => 0));
1382 Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1383 (others => (others => False));
1385 Temp_State_Not_Null : Boolean;
1387 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1389 Current_State : State_Index := 1;
1390 Nb_State : State_Index := 1;
1393 (State : in out Meta_State;
1394 Item : State_Index);
1395 -- Compute the closure of the state (that is every other state which
1396 -- has a empty-character transition) and add it to the state
1403 (State : in out Meta_State;
1407 if State (Item) then
1411 State (Item) := True;
1413 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1414 if First_Table (Item, Column) = 0 then
1418 Closure (State, First_Table (Item, Column));
1422 -- Start of processing for Create_Secondary_Table
1425 -- Create a new state
1427 Closure (Meta_States (Current_State), Start_State);
1429 while Current_State <= Nb_State loop
1431 -- If this new meta-state includes the primary table end state,
1432 -- then this meta-state will be a final state in the regexp
1434 if Meta_States (Current_State)(End_State) then
1435 Is_Final (Current_State) := True;
1438 -- For every character in the regexp, calculate the possible
1439 -- transitions from Current_State
1441 for Column in 0 .. Alphabet_Size loop
1442 Meta_States (Nb_State + 1) := (others => False);
1443 Temp_State_Not_Null := False;
1445 for K in Meta_States (Current_State)'Range loop
1446 if Meta_States (Current_State)(K)
1447 and then First_Table (K, Column) /= 0
1450 (Meta_States (Nb_State + 1), First_Table (K, Column));
1451 Temp_State_Not_Null := True;
1455 -- If at least one transition existed
1457 if Temp_State_Not_Null then
1459 -- Check if this new state corresponds to an old one
1461 for K in 1 .. Nb_State loop
1462 if Meta_States (K) = Meta_States (Nb_State + 1) then
1463 Table (Current_State, Column) := K;
1468 -- If not, create a new state
1470 if Table (Current_State, Column) = 0 then
1471 Nb_State := Nb_State + 1;
1472 Table (Current_State, Column) := Nb_State;
1477 Current_State := Current_State + 1;
1480 -- Returns the regexp
1486 R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1487 Num_States => Nb_State);
1489 R.Is_Final := Is_Final (1 .. Nb_State);
1490 R.Case_Sensitive := Case_Sensitive;
1492 for State in 1 .. Nb_State loop
1493 for K in 0 .. Alphabet_Size loop
1494 R.States (State, K) := Table (State, K);
1498 return (Ada.Finalization.Controlled with R => R);
1500 end Create_Secondary_Table;
1502 ---------------------
1503 -- Raise_Exception --
1504 ---------------------
1506 procedure Raise_Exception (M : String; Index : Integer) is
1508 raise Error_In_Regexp with M & " at offset" & Index'Img;
1509 end Raise_Exception;
1511 -- Start of processing for Compile
1514 -- Special case for the empty string: it always matches, and the
1515 -- following processing would fail on it.
1517 return (Ada.Finalization.Controlled with
1518 R => new Regexp_Value'
1519 (Alphabet_Size => 0,
1521 Map => (others => 0),
1522 States => (others => (others => 1)),
1523 Is_Final => (others => True),
1524 Case_Sensitive => True));
1527 if not Case_Sensitive then
1528 System.Case_Util.To_Lower (S);
1531 -- Check the pattern is well-formed before any treatment
1533 Check_Well_Formed_Pattern;
1537 -- Creates the primary table
1540 Table : Regexp_Array_Access;
1541 Num_States : State_Index;
1542 Start_State : State_Index;
1543 End_State : State_Index;
1547 Table := new Regexp_Array (1 .. 100,
1548 0 .. Alphabet_Size + 10);
1550 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1552 Create_Primary_Table_Glob
1553 (Table, Num_States, Start_State, End_State);
1556 -- Creates the secondary table
1558 R := Create_Secondary_Table
1559 (Table, Num_States, Start_State, End_State);
1569 procedure Finalize (R : in out Regexp) is
1570 procedure Free is new
1571 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1582 (Table : Regexp_Array_Access;
1583 State : State_Index;
1584 Column : Column_Index) return State_Index
1587 if State <= Table'Last (1)
1588 and then Column <= Table'Last (2)
1590 return Table (State, Column);
1600 function Match (S : String; R : Regexp) return Boolean is
1601 Current_State : State_Index := 1;
1605 raise Constraint_Error;
1608 for Char in S'Range loop
1610 if R.R.Case_Sensitive then
1611 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1614 R.R.States (Current_State,
1615 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1618 if Current_State = 0 then
1624 return R.R.Is_Final (Current_State);
1632 (Table : in out Regexp_Array_Access;
1633 State : State_Index;
1634 Column : Column_Index;
1635 Value : State_Index)
1637 New_Lines : State_Index;
1638 New_Columns : Column_Index;
1639 New_Table : Regexp_Array_Access;
1642 if State <= Table'Last (1)
1643 and then Column <= Table'Last (2)
1645 Table (State, Column) := Value;
1647 -- Doubles the size of the table until it is big enough that
1648 -- (State, Column) is a valid index
1650 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1651 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1652 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1653 Table'First (2) .. New_Columns);
1654 New_Table.all := (others => (others => 0));
1656 for J in Table'Range (1) loop
1657 for K in Table'Range (2) loop
1658 New_Table (J, K) := Table (J, K);
1664 Table (State, Column) := Value;