1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . R E G E X P --
9 -- Copyright (C) 1999-2009, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Ada.Unchecked_Deallocation;
36 with System.Case_Util;
38 package body System.Regexp is
40 Open_Paren : constant Character := '(';
41 Close_Paren : constant Character := ')';
42 Open_Bracket : constant Character := '[';
43 Close_Bracket : constant Character := ']';
45 type State_Index is new Natural;
46 type Column_Index is new Natural;
48 type Regexp_Array is array
49 (State_Index range <>, Column_Index range <>) of State_Index;
50 -- First index is for the state number
51 -- Second index is for the character type
52 -- Contents is the new State
54 type Regexp_Array_Access is access Regexp_Array;
55 -- Use this type through the functions Set below, so that it
56 -- can grow dynamically depending on the needs.
58 type Mapping is array (Character'Range) of Column_Index;
59 -- Mapping between characters and column in the Regexp_Array
61 type Boolean_Array is array (State_Index range <>) of Boolean;
64 (Alphabet_Size : Column_Index;
65 Num_States : State_Index) is
68 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
69 Is_Final : Boolean_Array (1 .. Num_States);
70 Case_Sensitive : Boolean;
72 -- Deterministic finite-state machine
74 -----------------------
75 -- Local Subprograms --
76 -----------------------
79 (Table : in out Regexp_Array_Access;
81 Column : Column_Index;
83 -- Sets a value in the table. If the table is too small, reallocate it
84 -- dynamically so that (State, Column) is a valid index in it.
87 (Table : Regexp_Array_Access;
89 Column : Column_Index)
91 -- Returns the value in the table at (State, Column).
92 -- If this index does not exist in the table, returns 0
94 procedure Free is new Ada.Unchecked_Deallocation
95 (Regexp_Array, Regexp_Array_Access);
101 procedure Adjust (R : in out Regexp) is
105 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
106 Num_States => R.R.Num_States);
117 Glob : Boolean := False;
118 Case_Sensitive : Boolean := True)
121 S : String := Pattern;
122 -- The pattern which is really compiled (when the pattern is case
123 -- insensitive, we convert this string to lower-cases
125 Map : Mapping := (others => 0);
126 -- Mapping between characters and columns in the tables
128 Alphabet_Size : Column_Index := 0;
129 -- Number of significant characters in the regular expression.
130 -- This total does not include special operators, such as *, (, ...
132 procedure Check_Well_Formed_Pattern;
133 -- Check that the pattern to compile is well-formed, so that
134 -- subsequent code can rely on this without performing each time
135 -- the checks to avoid accessing the pattern outside its bounds.
136 -- Except that, not all well-formedness rules are checked.
137 -- In particular, the rules about special characters not being
138 -- treated as regular characters are not checked.
140 procedure Create_Mapping;
141 -- Creates a mapping between characters in the regexp and columns
142 -- in the tables representing the regexp. Test that the regexp is
143 -- well-formed Modifies Alphabet_Size and Map
145 procedure Create_Primary_Table
146 (Table : out Regexp_Array_Access;
147 Num_States : out State_Index;
148 Start_State : out State_Index;
149 End_State : out State_Index);
150 -- Creates the first version of the regexp (this is a non deterministic
151 -- finite state machine, which is unadapted for a fast pattern
152 -- matching algorithm). We use a recursive algorithm to process the
153 -- parenthesis sub-expressions.
155 -- Table : at the end of the procedure : Column 0 is for any character
156 -- ('.') and the last columns are for no character (closure)
157 -- Num_States is set to the number of states in the table
158 -- Start_State is the number of the starting state in the regexp
159 -- End_State is the number of the final state when the regexp matches
161 procedure Create_Primary_Table_Glob
162 (Table : out Regexp_Array_Access;
163 Num_States : out State_Index;
164 Start_State : out State_Index;
165 End_State : out State_Index);
166 -- Same function as above, but it deals with the second possible
167 -- grammar for 'globbing pattern', which is a kind of subset of the
168 -- whole regular expression grammar.
170 function Create_Secondary_Table
171 (First_Table : Regexp_Array_Access;
172 Num_States : State_Index;
173 Start_State : State_Index;
174 End_State : State_Index)
176 -- Creates the definitive table representing the regular expression
177 -- This is actually a transformation of the primary table First_Table,
178 -- where every state is grouped with the states in its 'no-character'
179 -- columns. The transitions between the new states are then recalculated
180 -- and if necessary some new states are created.
182 -- Note that the resulting finite-state machine is not optimized in
183 -- terms of the number of states : it would be more time-consuming to
184 -- add a third pass to reduce the number of states in the machine, with
185 -- no speed improvement...
187 procedure Raise_Exception (M : String; Index : Integer);
188 pragma No_Return (Raise_Exception);
189 -- Raise an exception, indicating an error at character Index in S
191 -------------------------------
192 -- Check_Well_Formed_Pattern --
193 -------------------------------
195 procedure Check_Well_Formed_Pattern is
197 J : Integer := S'First;
198 Past_Elmt : Boolean := False;
199 -- Set to True everywhere an elmt has been parsed, if Glob=False,
200 -- meaning there can be now an occurence of '*', '+' and '?'.
201 Past_Term : Boolean := False;
202 -- Set to True everywhere a term has been parsed, if Glob=False,
203 -- meaning there can be now an occurence of '|'.
204 Parenthesis_Level : Integer := 0;
205 Curly_Level : Integer := 0;
206 Last_Open : Integer := S'First - 1;
207 -- The last occurence of an opening parenthesis, if Glob=False,
208 -- or the last occurence of an opening curly brace, if Glob=True.
210 procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
212 --------------------------------------
213 -- Raise_Exception_If_No_More_Chars --
214 --------------------------------------
216 procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
218 if J + K > S'Last then
220 ("Ill-formed pattern while parsing", J);
222 end Raise_Exception_If_No_More_Chars;
224 -- Start of processing for Check_Well_Formed_Pattern
227 while J <= S'Last loop
231 Raise_Exception_If_No_More_Chars;
236 Raise_Exception_If_No_More_Chars;
240 -- The first character never has a special meaning
242 if S (J) = ']' or else S (J) = '-' then
244 Raise_Exception_If_No_More_Chars;
247 -- The set of characters cannot be empty
251 ("Set of characters cannot be empty in regular "
256 Possible_Range_Start : Boolean := True;
257 -- Set to True everywhere a range character '-'
261 exit when S (J) = Close_Bracket;
263 -- The current character should be followed by
264 -- a closing bracket.
266 Raise_Exception_If_No_More_Chars (1);
269 and then S (J + 1) /= Close_Bracket
271 if not Possible_Range_Start then
273 ("No mix of ranges is allowed in "
274 & "regular expression", J);
278 Raise_Exception_If_No_More_Chars;
280 -- Range cannot be followed by '-' character,
281 -- except as last character in the set.
283 Possible_Range_Start := False;
285 Possible_Range_Start := True;
290 Raise_Exception_If_No_More_Chars;
297 -- A closing bracket can end an elmt or term
302 when Close_Bracket =>
303 -- A close bracket must follow a open_bracket,
304 -- and cannot be found alone on the line.
307 ("Incorrect character ']' in regular expression", J);
313 -- Any character can be an elmt or a term
318 -- \ not allowed at the end of the regexp
321 ("Incorrect character '\' in regular expression", J);
326 Parenthesis_Level := Parenthesis_Level + 1;
329 -- An open parenthesis does not end an elmt or term
337 Parenthesis_Level := Parenthesis_Level - 1;
339 if Parenthesis_Level < 0 then
341 ("')' is not associated with '(' in regular "
345 if J = Last_Open + 1 then
347 ("Empty parentheses not allowed in regular "
351 if not Past_Term then
353 ("Closing parenthesis not allowed here in regular "
357 -- A closing parenthesis can end an elmt or term
365 Curly_Level := Curly_Level + 1;
368 -- Any character can be an elmt or a term
374 -- No need to check for ',' as the code always accepts them
378 Curly_Level := Curly_Level - 1;
380 if Curly_Level < 0 then
382 ("'}' is not associated with '{' in regular "
386 if J = Last_Open + 1 then
388 ("Empty curly braces not allowed in regular "
392 -- Any character can be an elmt or a term
398 when '*' | '?' | '+' =>
400 -- These operators must apply to an elmt sub-expression,
401 -- and cannot be found if one has not just been parsed.
403 if not Past_Elmt then
405 ("'*', '+' and '?' operators must be "
406 & "applied to an element in regular expression", J);
415 -- This operator must apply to a term sub-expression,
416 -- and cannot be found if one has not just been parsed.
418 if not Past_Term then
420 ("'|' operator must be "
421 & "applied to a term in regular expression", J);
430 -- Any character can be an elmt or a term
440 -- A closing parenthesis must follow an open parenthesis
442 if Parenthesis_Level /= 0 then
444 ("'(' must always be associated with a ')'", J);
447 -- A closing curly brace must follow an open curly brace
449 if Curly_Level /= 0 then
451 ("'{' must always be associated with a '}'", J);
453 end Check_Well_Formed_Pattern;
459 procedure Create_Mapping is
461 procedure Add_In_Map (C : Character);
462 -- Add a character in the mapping, if it is not already defined
468 procedure Add_In_Map (C : Character) is
471 Alphabet_Size := Alphabet_Size + 1;
472 Map (C) := Alphabet_Size;
476 J : Integer := S'First;
477 Parenthesis_Level : Integer := 0;
478 Curly_Level : Integer := 0;
479 Last_Open : Integer := S'First - 1;
481 -- Start of processing for Create_Mapping
484 while J <= S'Last loop
493 if S (J) = ']' or else S (J) = '-' then
497 -- The first character never has a special meaning
502 ("Ran out of characters while parsing ", J);
505 exit when S (J) = Close_Bracket;
508 and then S (J + 1) /= Close_Bracket
511 Start : constant Integer := J - 1;
520 for Char in S (Start) .. S (J) loop
535 -- A close bracket must follow a open_bracket,
536 -- and cannot be found alone on the line
538 when Close_Bracket =>
540 ("Incorrect character ']' in regular expression", J);
548 -- \ not allowed at the end of the regexp
551 ("Incorrect character '\' in regular expression", J);
556 Parenthesis_Level := Parenthesis_Level + 1;
559 Add_In_Map (Open_Paren);
564 Parenthesis_Level := Parenthesis_Level - 1;
566 if Parenthesis_Level < 0 then
568 ("')' is not associated with '(' in regular "
572 if J = Last_Open + 1 then
574 ("Empty parenthesis not allowed in regular "
579 Add_In_Map (Close_Paren);
591 Curly_Level := Curly_Level + 1;
598 Curly_Level := Curly_Level - 1;
605 ("'*', '+', '?' and '|' operators cannot be in "
606 & "first position in regular expression", J);
614 -- These operators must apply to a sub-expression,
615 -- and cannot be found at the beginning of the line
618 ("'*', '+', '?' and '|' operators cannot be in "
619 & "first position in regular expression", J);
633 -- A closing parenthesis must follow an open parenthesis
635 if Parenthesis_Level /= 0 then
637 ("'(' must always be associated with a ')'", J);
640 if Curly_Level /= 0 then
642 ("'{' must always be associated with a '}'", J);
646 --------------------------
647 -- Create_Primary_Table --
648 --------------------------
650 procedure Create_Primary_Table
651 (Table : out Regexp_Array_Access;
652 Num_States : out State_Index;
653 Start_State : out State_Index;
654 End_State : out State_Index)
656 Empty_Char : constant Column_Index := Alphabet_Size + 1;
658 Current_State : State_Index := 0;
659 -- Index of the last created state
661 procedure Add_Empty_Char
662 (State : State_Index;
663 To_State : State_Index);
664 -- Add a empty-character transition from State to To_State
666 procedure Create_Repetition
667 (Repetition : Character;
668 Start_Prev : State_Index;
669 End_Prev : State_Index;
670 New_Start : out State_Index;
671 New_End : in out State_Index);
672 -- Create the table in case we have a '*', '+' or '?'.
673 -- Start_Prev .. End_Prev should indicate respectively the start and
674 -- end index of the previous expression, to which '*', '+' or '?' is
677 procedure Create_Simple
678 (Start_Index : Integer;
680 Start_State : out State_Index;
681 End_State : out State_Index);
682 -- Fill the table for the regexp Simple.
683 -- This is the recursive procedure called to handle () expressions
684 -- If End_State = 0, then the call to Create_Simple creates an
685 -- independent regexp, not a concatenation
686 -- Start_Index .. End_Index is the starting index in the string S.
688 -- Warning: it may look like we are creating too many empty-string
689 -- transitions, but they are needed to get the correct regexp.
690 -- The table is filled as follow ( s means start-state, e means
693 -- regexp state_num | a b * empty_string
694 -- ------- ------------------------------
698 -- ab 1 (s) | 2 - - -
715 -- (a) 1 (s) | 2 - - -
731 function Next_Sub_Expression
732 (Start_Index : Integer;
735 -- Returns the index of the last character of the next sub-expression
736 -- in Simple. Index cannot be greater than End_Index.
742 procedure Add_Empty_Char
743 (State : State_Index;
744 To_State : State_Index)
746 J : Column_Index := Empty_Char;
749 while Get (Table, State, J) /= 0 loop
753 Set (Table, State, J, To_State);
756 -----------------------
757 -- Create_Repetition --
758 -----------------------
760 procedure Create_Repetition
761 (Repetition : Character;
762 Start_Prev : State_Index;
763 End_Prev : State_Index;
764 New_Start : out State_Index;
765 New_End : in out State_Index)
768 New_Start := Current_State + 1;
771 Add_Empty_Char (New_End, New_Start);
774 Current_State := Current_State + 2;
775 New_End := Current_State;
777 Add_Empty_Char (End_Prev, New_End);
778 Add_Empty_Char (New_Start, Start_Prev);
780 if Repetition /= '+' then
781 Add_Empty_Char (New_Start, New_End);
784 if Repetition /= '?' then
785 Add_Empty_Char (New_End, New_Start);
787 end Create_Repetition;
793 procedure Create_Simple
794 (Start_Index : Integer;
796 Start_State : out State_Index;
797 End_State : out State_Index)
799 J : Integer := Start_Index;
800 Last_Start : State_Index := 0;
805 while J <= End_Index loop
809 J_Start : constant Integer := J + 1;
810 Next_Start : State_Index;
811 Next_End : State_Index;
814 J := Next_Sub_Expression (J, End_Index);
815 Create_Simple (J_Start, J - 1, Next_Start, Next_End);
818 and then (S (J + 1) = '*' or else
819 S (J + 1) = '+' or else
831 Last_Start := Next_Start;
833 if End_State /= 0 then
834 Add_Empty_Char (End_State, Last_Start);
837 End_State := Next_End;
843 Start_Prev : constant State_Index := Start_State;
844 End_Prev : constant State_Index := End_State;
845 Start_J : constant Integer := J + 1;
846 Start_Next : State_Index := 0;
847 End_Next : State_Index := 0;
850 J := Next_Sub_Expression (J, End_Index);
852 -- Create a new state for the start of the alternative
854 Current_State := Current_State + 1;
855 Last_Start := Current_State;
856 Start_State := Last_Start;
858 -- Create the tree for the second part of alternative
860 Create_Simple (Start_J, J, Start_Next, End_Next);
862 -- Create the end state
864 Add_Empty_Char (Last_Start, Start_Next);
865 Add_Empty_Char (Last_Start, Start_Prev);
866 Current_State := Current_State + 1;
867 End_State := Current_State;
868 Add_Empty_Char (End_Prev, End_State);
869 Add_Empty_Char (End_Next, End_State);
873 Current_State := Current_State + 1;
876 Next_State : State_Index := Current_State + 1;
886 for Column in 0 .. Alphabet_Size loop
887 Set (Table, Current_State, Column,
888 Value => Current_State + 1);
892 -- Automatically add the first character
894 if S (J) = '-' or else S (J) = ']' then
895 Set (Table, Current_State, Map (S (J)),
896 Value => Next_State);
900 -- Loop till closing bracket found
903 exit when S (J) = Close_Bracket;
906 and then S (J + 1) /= ']'
909 Start : constant Integer := J - 1;
918 for Char in S (Start) .. S (J) loop
919 Set (Table, Current_State, Map (Char),
920 Value => Next_State);
929 Set (Table, Current_State, Map (S (J)),
930 Value => Next_State);
936 Current_State := Current_State + 1;
938 -- If the next symbol is a special symbol
941 and then (S (J + 1) = '*' or else
942 S (J + 1) = '+' or else
954 Last_Start := Current_State - 1;
956 if End_State /= 0 then
957 Add_Empty_Char (End_State, Last_Start);
960 End_State := Current_State;
963 when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
965 ("Incorrect character in regular expression :", J);
968 Current_State := Current_State + 1;
970 -- Create the state for the symbol S (J)
973 for K in 0 .. Alphabet_Size loop
974 Set (Table, Current_State, K,
975 Value => Current_State + 1);
983 Set (Table, Current_State, Map (S (J)),
984 Value => Current_State + 1);
987 Current_State := Current_State + 1;
989 -- If the next symbol is a special symbol
992 and then (S (J + 1) = '*' or else
993 S (J + 1) = '+' or else
1005 Last_Start := Current_State - 1;
1007 if End_State /= 0 then
1008 Add_Empty_Char (End_State, Last_Start);
1011 End_State := Current_State;
1016 if Start_State = 0 then
1017 Start_State := Last_Start;
1024 -------------------------
1025 -- Next_Sub_Expression --
1026 -------------------------
1028 function Next_Sub_Expression
1029 (Start_Index : Integer;
1030 End_Index : Integer)
1033 J : Integer := Start_Index;
1034 Start_On_Alter : Boolean := False;
1038 Start_On_Alter := True;
1042 exit when J = End_Index;
1049 when Open_Bracket =>
1052 exit when S (J) = Close_Bracket;
1060 J := Next_Sub_Expression (J, End_Index);
1066 if Start_On_Alter then
1076 end Next_Sub_Expression;
1078 -- Start of Create_Primary_Table
1081 Table.all := (others => (others => 0));
1082 Create_Simple (S'First, S'Last, Start_State, End_State);
1083 Num_States := Current_State;
1084 end Create_Primary_Table;
1086 -------------------------------
1087 -- Create_Primary_Table_Glob --
1088 -------------------------------
1090 procedure Create_Primary_Table_Glob
1091 (Table : out Regexp_Array_Access;
1092 Num_States : out State_Index;
1093 Start_State : out State_Index;
1094 End_State : out State_Index)
1096 Empty_Char : constant Column_Index := Alphabet_Size + 1;
1098 Current_State : State_Index := 0;
1099 -- Index of the last created state
1101 procedure Add_Empty_Char
1102 (State : State_Index;
1103 To_State : State_Index);
1104 -- Add a empty-character transition from State to To_State
1106 procedure Create_Simple
1107 (Start_Index : Integer;
1108 End_Index : Integer;
1109 Start_State : out State_Index;
1110 End_State : out State_Index);
1111 -- Fill the table for the S (Start_Index .. End_Index).
1112 -- This is the recursive procedure called to handle () expressions
1114 --------------------
1115 -- Add_Empty_Char --
1116 --------------------
1118 procedure Add_Empty_Char
1119 (State : State_Index;
1120 To_State : State_Index)
1122 J : Column_Index := Empty_Char;
1125 while Get (Table, State, J) /= 0 loop
1129 Set (Table, State, J,
1137 procedure Create_Simple
1138 (Start_Index : Integer;
1139 End_Index : Integer;
1140 Start_State : out State_Index;
1141 End_State : out State_Index)
1143 J : Integer := Start_Index;
1144 Last_Start : State_Index := 0;
1150 while J <= End_Index loop
1153 when Open_Bracket =>
1154 Current_State := Current_State + 1;
1157 Next_State : State_Index := Current_State + 1;
1166 for Column in 0 .. Alphabet_Size loop
1167 Set (Table, Current_State, Column,
1168 Value => Current_State + 1);
1172 -- Automatically add the first character
1174 if S (J) = '-' or else S (J) = ']' then
1175 Set (Table, Current_State, Map (S (J)),
1176 Value => Current_State);
1180 -- Loop till closing bracket found
1183 exit when S (J) = Close_Bracket;
1186 and then S (J + 1) /= ']'
1189 Start : constant Integer := J - 1;
1197 for Char in S (Start) .. S (J) loop
1198 Set (Table, Current_State, Map (Char),
1199 Value => Next_State);
1208 Set (Table, Current_State, Map (S (J)),
1209 Value => Next_State);
1215 Last_Start := Current_State;
1216 Current_State := Current_State + 1;
1218 if End_State /= 0 then
1219 Add_Empty_Char (End_State, Last_Start);
1222 End_State := Current_State;
1227 Start_Regexp_Sub : State_Index;
1228 End_Regexp_Sub : State_Index;
1229 Create_Start : State_Index := 0;
1231 Create_End : State_Index := 0;
1232 -- Initialized to avoid junk warning
1235 while S (J) /= '}' loop
1237 -- First step : find sub pattern
1240 while S (End_Sub) /= ','
1241 and then S (End_Sub) /= '}'
1243 End_Sub := End_Sub + 1;
1246 -- Second step : create a sub pattern
1256 -- Third step : create an alternative
1258 if Create_Start = 0 then
1259 Current_State := Current_State + 1;
1260 Create_Start := Current_State;
1261 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1262 Current_State := Current_State + 1;
1263 Create_End := Current_State;
1264 Add_Empty_Char (End_Regexp_Sub, Create_End);
1267 Current_State := Current_State + 1;
1268 Add_Empty_Char (Current_State, Create_Start);
1269 Create_Start := Current_State;
1270 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1271 Add_Empty_Char (End_Regexp_Sub, Create_End);
1275 if End_State /= 0 then
1276 Add_Empty_Char (End_State, Create_Start);
1279 End_State := Create_End;
1280 Last_Start := Create_Start;
1284 Current_State := Current_State + 1;
1286 if End_State /= 0 then
1287 Add_Empty_Char (End_State, Current_State);
1290 Add_Empty_Char (Current_State, Current_State + 1);
1291 Add_Empty_Char (Current_State, Current_State + 3);
1292 Last_Start := Current_State;
1294 Current_State := Current_State + 1;
1296 for K in 0 .. Alphabet_Size loop
1297 Set (Table, Current_State, K,
1298 Value => Current_State + 1);
1301 Current_State := Current_State + 1;
1302 Add_Empty_Char (Current_State, Current_State + 1);
1304 Current_State := Current_State + 1;
1305 Add_Empty_Char (Current_State, Last_Start);
1306 End_State := Current_State;
1309 Current_State := Current_State + 1;
1312 for K in 0 .. Alphabet_Size loop
1313 Set (Table, Current_State, K,
1314 Value => Current_State + 1);
1322 -- Create the state for the symbol S (J)
1324 Set (Table, Current_State, Map (S (J)),
1325 Value => Current_State + 1);
1328 Last_Start := Current_State;
1329 Current_State := Current_State + 1;
1331 if End_State /= 0 then
1332 Add_Empty_Char (End_State, Last_Start);
1335 End_State := Current_State;
1339 if Start_State = 0 then
1340 Start_State := Last_Start;
1347 -- Start of processing for Create_Primary_Table_Glob
1350 Table.all := (others => (others => 0));
1351 Create_Simple (S'First, S'Last, Start_State, End_State);
1352 Num_States := Current_State;
1353 end Create_Primary_Table_Glob;
1355 ----------------------------
1356 -- Create_Secondary_Table --
1357 ----------------------------
1359 function Create_Secondary_Table
1360 (First_Table : Regexp_Array_Access;
1361 Num_States : State_Index;
1362 Start_State : State_Index;
1363 End_State : State_Index) return Regexp
1365 pragma Warnings (Off, Num_States);
1367 Last_Index : constant State_Index := First_Table'Last (1);
1368 type Meta_State is array (1 .. Last_Index) of Boolean;
1370 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1371 (others => (others => 0));
1373 Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1374 (others => (others => False));
1376 Temp_State_Not_Null : Boolean;
1378 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1380 Current_State : State_Index := 1;
1381 Nb_State : State_Index := 1;
1384 (State : in out Meta_State;
1385 Item : State_Index);
1386 -- Compute the closure of the state (that is every other state which
1387 -- has a empty-character transition) and add it to the state
1394 (State : in out Meta_State;
1398 if State (Item) then
1402 State (Item) := True;
1404 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1405 if First_Table (Item, Column) = 0 then
1409 Closure (State, First_Table (Item, Column));
1413 -- Start of processing for Create_Secondary_Table
1416 -- Create a new state
1418 Closure (Meta_States (Current_State), Start_State);
1420 while Current_State <= Nb_State loop
1422 -- If this new meta-state includes the primary table end state,
1423 -- then this meta-state will be a final state in the regexp
1425 if Meta_States (Current_State)(End_State) then
1426 Is_Final (Current_State) := True;
1429 -- For every character in the regexp, calculate the possible
1430 -- transitions from Current_State
1432 for Column in 0 .. Alphabet_Size loop
1433 Meta_States (Nb_State + 1) := (others => False);
1434 Temp_State_Not_Null := False;
1436 for K in Meta_States (Current_State)'Range loop
1437 if Meta_States (Current_State)(K)
1438 and then First_Table (K, Column) /= 0
1441 (Meta_States (Nb_State + 1), First_Table (K, Column));
1442 Temp_State_Not_Null := True;
1446 -- If at least one transition existed
1448 if Temp_State_Not_Null then
1450 -- Check if this new state corresponds to an old one
1452 for K in 1 .. Nb_State loop
1453 if Meta_States (K) = Meta_States (Nb_State + 1) then
1454 Table (Current_State, Column) := K;
1459 -- If not, create a new state
1461 if Table (Current_State, Column) = 0 then
1462 Nb_State := Nb_State + 1;
1463 Table (Current_State, Column) := Nb_State;
1468 Current_State := Current_State + 1;
1471 -- Returns the regexp
1477 R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1478 Num_States => Nb_State);
1480 R.Is_Final := Is_Final (1 .. Nb_State);
1481 R.Case_Sensitive := Case_Sensitive;
1483 for State in 1 .. Nb_State loop
1484 for K in 0 .. Alphabet_Size loop
1485 R.States (State, K) := Table (State, K);
1489 return (Ada.Finalization.Controlled with R => R);
1491 end Create_Secondary_Table;
1493 ---------------------
1494 -- Raise_Exception --
1495 ---------------------
1497 procedure Raise_Exception (M : String; Index : Integer) is
1499 raise Error_In_Regexp with M & " at offset" & Index'Img;
1500 end Raise_Exception;
1502 -- Start of processing for Compile
1505 -- Special case for the empty string: it always matches, and the
1506 -- following processing would fail on it.
1508 return (Ada.Finalization.Controlled with
1509 R => new Regexp_Value'
1510 (Alphabet_Size => 0,
1512 Map => (others => 0),
1513 States => (others => (others => 1)),
1514 Is_Final => (others => True),
1515 Case_Sensitive => True));
1518 if not Case_Sensitive then
1519 System.Case_Util.To_Lower (S);
1522 -- Check the pattern is well-formed before any treatment
1524 Check_Well_Formed_Pattern;
1528 -- Creates the primary table
1531 Table : Regexp_Array_Access;
1532 Num_States : State_Index;
1533 Start_State : State_Index;
1534 End_State : State_Index;
1538 Table := new Regexp_Array (1 .. 100,
1539 0 .. Alphabet_Size + 10);
1541 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1543 Create_Primary_Table_Glob
1544 (Table, Num_States, Start_State, End_State);
1547 -- Creates the secondary table
1549 R := Create_Secondary_Table
1550 (Table, Num_States, Start_State, End_State);
1560 procedure Finalize (R : in out Regexp) is
1561 procedure Free is new
1562 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1573 (Table : Regexp_Array_Access;
1574 State : State_Index;
1575 Column : Column_Index) return State_Index
1578 if State <= Table'Last (1)
1579 and then Column <= Table'Last (2)
1581 return Table (State, Column);
1591 function Match (S : String; R : Regexp) return Boolean is
1592 Current_State : State_Index := 1;
1596 raise Constraint_Error;
1599 for Char in S'Range loop
1601 if R.R.Case_Sensitive then
1602 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1605 R.R.States (Current_State,
1606 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1609 if Current_State = 0 then
1615 return R.R.Is_Final (Current_State);
1623 (Table : in out Regexp_Array_Access;
1624 State : State_Index;
1625 Column : Column_Index;
1626 Value : State_Index)
1628 New_Lines : State_Index;
1629 New_Columns : Column_Index;
1630 New_Table : Regexp_Array_Access;
1633 if State <= Table'Last (1)
1634 and then Column <= Table'Last (2)
1636 Table (State, Column) := Value;
1638 -- Doubles the size of the table until it is big enough that
1639 -- (State, Column) is a valid index
1641 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1642 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1643 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1644 Table'First (2) .. New_Columns);
1645 New_Table.all := (others => (others => 0));
1647 for J in Table'Range (1) loop
1648 for K in Table'Range (2) loop
1649 New_Table (J, K) := Table (J, K);
1655 Table (State, Column) := Value;