1 ------------------------------------------------------------------------------
3 -- GNAT LIBRARY COMPONENTS --
5 -- G N A T . S P I T B O L . P A T T E R N S --
9 -- Copyright (C) 1998-2010, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 -- Note: the data structures and general approach used in this implementation
33 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
34 -- a direct translation, but the approach is followed closely. In particular,
35 -- we use the one stack approach developed in the SPITBOL implementation.
37 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
39 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
41 with System; use System;
43 with Ada.Unchecked_Conversion;
44 with Ada.Unchecked_Deallocation;
46 package body GNAT.Spitbol.Patterns is
48 ------------------------
49 -- Internal Debugging --
50 ------------------------
52 Internal_Debug : constant Boolean := False;
53 -- Set this flag to True to activate some built-in debugging traceback
54 -- These are all lines output with PutD and Put_LineD.
57 pragma Inline (New_LineD);
58 -- Output new blank line with New_Line if Internal_Debug is True
60 procedure PutD (Str : String);
62 -- Output string with Put if Internal_Debug is True
64 procedure Put_LineD (Str : String);
65 pragma Inline (Put_LineD);
66 -- Output string with Put_Line if Internal_Debug is True
68 -----------------------------
69 -- Local Type Declarations --
70 -----------------------------
72 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
73 subtype File_Ptr is Ada.Text_IO.File_Access;
75 function To_Address is new Ada.Unchecked_Conversion (PE_Ptr, Address);
76 -- Used only for debugging output purposes
78 subtype AFC is Ada.Finalization.Controlled;
80 N : constant PE_Ptr := null;
81 -- Shorthand used to initialize Copy fields to null
83 type Natural_Ptr is access all Natural;
84 type Pattern_Ptr is access all Pattern;
86 --------------------------------------------------
87 -- Description of Algorithm and Data Structures --
88 --------------------------------------------------
90 -- A pattern structure is represented as a linked graph of nodes
91 -- with the following structure:
93 -- +------------------------------------+
95 -- +------------------------------------+
97 -- +------------------------------------+
99 -- +------------------------------------+
101 -- +------------------------------------+
103 -- Pcode is a code value indicating the type of the pattern node. This
104 -- code is used both as the discriminant value for the record, and as
105 -- the case index in the main match routine that branches to the proper
106 -- match code for the given element.
108 -- Index is a serial index number. The use of these serial index
109 -- numbers is described in a separate section.
111 -- Pthen is a pointer to the successor node, i.e the node to be matched
112 -- if the attempt to match the node succeeds. If this is the last node
113 -- of the pattern to be matched, then Pthen points to a dummy node
114 -- of kind PC_EOP (end of pattern), which initializes pattern exit.
116 -- The parameter or parameters are present for certain node types,
117 -- and the type varies with the pattern code.
119 type Pattern_Code is (
212 type IndexT is range 0 .. +(2 **15 - 1);
214 type PE (Pcode : Pattern_Code) is record
217 -- Serial index number of pattern element within pattern
220 -- Successor element, to be matched after this one
240 PC_Unanchored => null;
245 PC_Arbno_X => Alt : PE_Ptr;
247 when PC_Rpat => PP : Pattern_Ptr;
249 when PC_Pred_Func => BF : Boolean_Func;
259 PC_String_VP => VP : VString_Ptr;
262 PC_Write_OnM => FP : File_Ptr;
264 when PC_String => Str : String_Ptr;
266 when PC_String_2 => Str2 : String (1 .. 2);
268 when PC_String_3 => Str3 : String (1 .. 3);
270 when PC_String_4 => Str4 : String (1 .. 4);
272 when PC_String_5 => Str5 : String (1 .. 5);
274 when PC_String_6 => Str6 : String (1 .. 6);
276 when PC_Setcur => Var : Natural_Ptr;
284 PC_Span_CH => Char : Character;
291 PC_Span_CS => CS : Character_Set;
298 PC_Tab_Nat => Nat : Natural;
304 PC_Tab_NF => NF : Natural_Func;
310 PC_Tab_NP => NP : Natural_Ptr;
318 PC_String_VF => VF : VString_Func;
323 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
324 -- Range of pattern codes that has an Alt field. This is used in the
325 -- recursive traversals, since these links must be followed.
327 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
328 -- This is the end of pattern element, and is thus the representation of
329 -- a null pattern. It has a zero index element since it is never placed
330 -- inside a pattern. Furthermore it does not need a successor, since it
331 -- marks the end of the pattern, so that no more successors are needed.
333 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
334 -- This is the end of pattern pointer, that is used in the Pthen pointer
335 -- of other nodes to signal end of pattern.
337 -- The following array is used to determine if a pattern used as an
338 -- argument for Arbno is eligible for treatment using the simple Arbno
339 -- structure (i.e. it is a pattern that is guaranteed to match at least
340 -- one character on success, and not to make any entries on the stack.
342 OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean :=
365 -------------------------------
366 -- The Pattern History Stack --
367 -------------------------------
369 -- The pattern history stack is used for controlling backtracking when
370 -- a match fails. The idea is to stack entries that give a cursor value
371 -- to be restored, and a node to be reestablished as the current node to
372 -- attempt an appropriate rematch operation. The processing for a pattern
373 -- element that has rematch alternatives pushes an appropriate entry or
374 -- entry on to the stack, and the proceeds. If a match fails at any point,
375 -- the top element of the stack is popped off, resetting the cursor and
376 -- the match continues by accessing the node stored with this entry.
378 type Stack_Entry is record
381 -- Saved cursor value that is restored when this entry is popped
382 -- from the stack if a match attempt fails. Occasionally, this
383 -- field is used to store a history stack pointer instead of a
384 -- cursor. Such cases are noted in the documentation and the value
385 -- stored is negative since stack pointer values are always negative.
388 -- This pattern element reference is reestablished as the current
389 -- Node to be matched (which will attempt an appropriate rematch).
393 subtype Stack_Range is Integer range -Stack_Size .. -1;
395 type Stack_Type is array (Stack_Range) of Stack_Entry;
396 -- The type used for a history stack. The actual instance of the stack
397 -- is declared as a local variable in the Match routine, to properly
398 -- handle recursive calls to Match. All stack pointer values are negative
399 -- to distinguish them from normal cursor values.
401 -- Note: the pattern matching stack is used only to handle backtracking.
402 -- If no backtracking occurs, its entries are never accessed, and never
403 -- popped off, and in particular it is normal for a successful match
404 -- to terminate with entries on the stack that are simply discarded.
406 -- Note: in subsequent diagrams of the stack, we always place element
407 -- zero (the deepest element) at the top of the page, then build the
408 -- stack down on the page with the most recent (top of stack) element
409 -- being the bottom-most entry on the page.
411 -- Stack checking is handled by labeling every pattern with the maximum
412 -- number of stack entries that are required, so a single check at the
413 -- start of matching the pattern suffices. There are two exceptions.
415 -- First, the count does not include entries for recursive pattern
416 -- references. Such recursions must therefore perform a specific
417 -- stack check with respect to the number of stack entries required
418 -- by the recursive pattern that is accessed and the amount of stack
419 -- that remains unused.
421 -- Second, the count includes only one iteration of an Arbno pattern,
422 -- so a specific check must be made on subsequent iterations that there
423 -- is still enough stack space left. The Arbno node has a field that
424 -- records the number of stack entries required by its argument for
427 ---------------------------------------------------
428 -- Use of Serial Index Field in Pattern Elements --
429 ---------------------------------------------------
431 -- The serial index numbers for the pattern elements are assigned as
432 -- a pattern is constructed from its constituent elements. Note that there
433 -- is never any sharing of pattern elements between patterns (copies are
434 -- always made), so the serial index numbers are unique to a particular
435 -- pattern as referenced from the P field of a value of type Pattern.
437 -- The index numbers meet three separate invariants, which are used for
438 -- various purposes as described in this section.
440 -- First, the numbers uniquely identify the pattern elements within a
441 -- pattern. If Num is the number of elements in a given pattern, then
442 -- the serial index numbers for the elements of this pattern will range
443 -- from 1 .. Num, so that each element has a separate value.
445 -- The purpose of this assignment is to provide a convenient auxiliary
446 -- data structure mechanism during operations which must traverse a
447 -- pattern (e.g. copy and finalization processing). Once constructed
448 -- patterns are strictly read only. This is necessary to allow sharing
449 -- of patterns between tasks. This means that we cannot go marking the
450 -- pattern (e.g. with a visited bit). Instead we construct a separate
451 -- vector that contains the necessary information indexed by the Index
452 -- values in the pattern elements. For this purpose the only requirement
453 -- is that they be uniquely assigned.
455 -- Second, the pattern element referenced directly, i.e. the leading
456 -- pattern element, is always the maximum numbered element and therefore
457 -- indicates the total number of elements in the pattern. More precisely,
458 -- the element referenced by the P field of a pattern value, or the
459 -- element returned by any of the internal pattern construction routines
460 -- in the body (that return a value of type PE_Ptr) always is this
463 -- The purpose of this requirement is to allow an immediate determination
464 -- of the number of pattern elements within a pattern. This is used to
465 -- properly size the vectors used to contain auxiliary information for
466 -- traversal as described above.
468 -- Third, as compound pattern structures are constructed, the way in which
469 -- constituent parts of the pattern are constructed is stylized. This is
470 -- an automatic consequence of the way that these compound structures
471 -- are constructed, and basically what we are doing is simply documenting
472 -- and specifying the natural result of the pattern construction. The
473 -- section describing compound pattern structures gives details of the
474 -- numbering of each compound pattern structure.
476 -- The purpose of specifying the stylized numbering structures for the
477 -- compound patterns is to help simplify the processing in the Image
478 -- function, since it eases the task of retrieving the original recursive
479 -- structure of the pattern from the flat graph structure of elements.
480 -- This use in the Image function is the only point at which the code
481 -- makes use of the stylized structures.
483 type Ref_Array is array (IndexT range <>) of PE_Ptr;
484 -- This type is used to build an array whose N'th entry references the
485 -- element in a pattern whose Index value is N. See Build_Ref_Array.
487 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
488 -- Given a pattern element which is the leading element of a pattern
489 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
490 -- Ref_Array so that its N'th entry references the element of the
491 -- referenced pattern whose Index value is N.
493 -------------------------------
494 -- Recursive Pattern Matches --
495 -------------------------------
497 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
498 -- causes a recursive pattern match. This cannot be handled by an actual
499 -- recursive call to the outer level Match routine, since this would not
500 -- allow for possible backtracking into the region matched by the inner
501 -- pattern. Indeed this is the classical clash between recursion and
502 -- backtracking, and a simple recursive stack structure does not suffice.
504 -- This section describes how this recursion and the possible associated
505 -- backtracking is handled. We still use a single stack, but we establish
506 -- the concept of nested regions on this stack, each of which has a stack
507 -- base value pointing to the deepest stack entry of the region. The base
508 -- value for the outer level is zero.
510 -- When a recursive match is established, two special stack entries are
511 -- made. The first entry is used to save the original node that starts
512 -- the recursive match. This is saved so that the successor field of
513 -- this node is accessible at the end of the match, but it is never
514 -- popped and executed.
516 -- The second entry corresponds to a standard new region action. A
517 -- PC_R_Remove node is stacked, whose cursor field is used to store
518 -- the outer stack base, and the stack base is reset to point to
519 -- this PC_R_Remove node. Then the recursive pattern is matched and
520 -- it can make history stack entries in the normal matter, so now
521 -- the stack looks like:
523 -- (stack entries made by outer level)
525 -- (Special entry, node is (+P) successor
526 -- cursor entry is not used)
528 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
529 -- saved base value for the enclosing region)
531 -- (stack entries made by inner level)
533 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
534 -- removes itself and the special entry immediately underneath it,
535 -- restores the stack base value for the enclosing region, and then
536 -- again signals failure to look for alternatives that were stacked
537 -- before the recursion was initiated.
539 -- Now we need to consider what happens if the inner pattern succeeds, as
540 -- signalled by accessing the special PC_EOP pattern primitive. First we
541 -- recognize the nested case by looking at the Base value. If this Base
542 -- value is Stack'First, then the entire match has succeeded, but if the
543 -- base value is greater than Stack'First, then we have successfully
544 -- matched an inner pattern, and processing continues at the outer level.
546 -- There are two cases. The simple case is when the inner pattern has made
547 -- no stack entries, as recognized by the fact that the current stack
548 -- pointer is equal to the current base value. In this case it is fine to
549 -- remove all trace of the recursion by restoring the outer base value and
550 -- using the special entry to find the appropriate successor node.
552 -- The more complex case arises when the inner match does make stack
553 -- entries. In this case, the PC_EOP processing stacks a special entry
554 -- whose cursor value saves the saved inner base value (the one that
555 -- references the corresponding PC_R_Remove value), and whose node
556 -- pointer references a PC_R_Restore node, so the stack looks like:
558 -- (stack entries made by outer level)
560 -- (Special entry, node is (+P) successor,
561 -- cursor entry is not used)
563 -- (PC_R_Remove entry, "cursor" value is (negative)
564 -- saved base value for the enclosing region)
566 -- (stack entries made by inner level)
568 -- (PC_Region_Replace entry, "cursor" value is (negative)
569 -- stack pointer value referencing the PC_R_Remove entry).
571 -- If the entire match succeeds, then these stack entries are, as usual,
572 -- ignored and abandoned. If on the other hand a subsequent failure
573 -- causes the PC_Region_Replace entry to be popped, it restores the
574 -- inner base value from its saved "cursor" value and then fails again.
575 -- Note that it is OK that the cursor is temporarily clobbered by this
576 -- pop, since the second failure will reestablish a proper cursor value.
578 ---------------------------------
579 -- Compound Pattern Structures --
580 ---------------------------------
582 -- This section discusses the compound structures used to represent
583 -- constructed patterns. It shows the graph structures of pattern
584 -- elements that are constructed, and in the case of patterns that
585 -- provide backtracking possibilities, describes how the history
586 -- stack is used to control the backtracking. Finally, it notes the
587 -- way in which the Index numbers are assigned to the structure.
589 -- In all diagrams, solid lines (built with minus signs or vertical
590 -- bars, represent successor pointers (Pthen fields) with > or V used
591 -- to indicate the direction of the pointer. The initial node of the
592 -- structure is in the upper left of the diagram. A dotted line is an
593 -- alternative pointer from the element above it to the element below
594 -- it. See individual sections for details on how alternatives are used.
600 -- In the pattern structures listed in this section, a line that looks
601 -- like ----> with nothing to the right indicates an end of pattern
602 -- (EOP) pointer that represents the end of the match.
604 -- When a pattern concatenation (L & R) occurs, the resulting structure
605 -- is obtained by finding all such EOP pointers in L, and replacing
606 -- them to point to R. This is the most important flattening that
607 -- occurs in constructing a pattern, and it means that the pattern
608 -- matching circuitry does not have to keep track of the structure
609 -- of a pattern with respect to concatenation, since the appropriate
610 -- successor is always at hand.
612 -- Concatenation itself generates no additional possibilities for
613 -- backtracking, but the constituent patterns of the concatenated
614 -- structure will make stack entries as usual. The maximum amount
615 -- of stack required by the structure is thus simply the sum of the
616 -- maximums required by L and R.
618 -- The index numbering of a concatenation structure works by leaving
619 -- the numbering of the right hand pattern, R, unchanged and adjusting
620 -- the numbers in the left hand pattern, L up by the count of elements
621 -- in R. This ensures that the maximum numbered element is the leading
622 -- element as required (given that it was the leading element in L).
628 -- A pattern (L or R) constructs the structure:
631 -- | A |---->| L |---->
639 -- The A element here is a PC_Alt node, and the dotted line represents
640 -- the contents of the Alt field. When the PC_Alt element is matched,
641 -- it stacks a pointer to the leading element of R on the history stack
642 -- so that on subsequent failure, a match of R is attempted.
644 -- The A node is the highest numbered element in the pattern. The
645 -- original index numbers of R are unchanged, but the index numbers
646 -- of the L pattern are adjusted up by the count of elements in R.
648 -- Note that the difference between the index of the L leading element
649 -- the index of the R leading element (after building the alt structure)
650 -- indicates the number of nodes in L, and this is true even after the
651 -- structure is incorporated into some larger structure. For example,
652 -- if the A node has index 16, and L has index 15 and R has index
653 -- 5, then we know that L has 10 (15-5) elements in it.
655 -- Suppose that we now concatenate this structure to another pattern
656 -- with 9 elements in it. We will now have the A node with an index
657 -- of 25, L with an index of 24 and R with an index of 14. We still
658 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
659 -- consequently the successor of the alternation structure has an
660 -- index with a value less than 15. This is used in Image to figure
661 -- out the original recursive structure of a pattern.
663 -- To clarify the interaction of the alternation and concatenation
664 -- structures, here is a more complex example of the structure built
667 -- (V or W or X) (Y or Z)
669 -- where A,B,C,D,E are all single element patterns:
671 -- +---+ +---+ +---+ +---+
672 -- I A I---->I V I---+-->I A I---->I Y I---->
673 -- +---+ +---+ I +---+ +---+
676 -- +---+ +---+ I +---+
677 -- I A I---->I W I-->I I Z I---->
678 -- +---+ +---+ I +---+
682 -- I X I------------>+
685 -- The numbering of the nodes would be as follows:
687 -- +---+ +---+ +---+ +---+
688 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
689 -- +---+ +---+ I +---+ +---+
692 -- +---+ +---+ I +---+
693 -- I 6 I---->I 5 I-->I I 1 I---->
694 -- +---+ +---+ I +---+
698 -- I 4 I------------>+
701 -- Note: The above structure actually corresponds to
703 -- (A or (B or C)) (D or E)
707 -- ((A or B) or C) (D or E)
709 -- which is the more natural interpretation, but in fact alternation
710 -- is associative, and the construction of an alternative changes the
711 -- left grouped pattern to the right grouped pattern in any case, so
712 -- that the Image function produces a more natural looking output.
718 -- An Arb pattern builds the structure
729 -- The X node is a PC_Arb_X node, which matches null, and stacks a
730 -- pointer to Y node, which is the PC_Arb_Y node that matches one
731 -- extra character and restacks itself.
733 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1
735 -------------------------
736 -- Arbno (simple case) --
737 -------------------------
739 -- The simple form of Arbno can be used where the pattern always
740 -- matches at least one character if it succeeds, and it is known
741 -- not to make any history stack entries. In this case, Arbno (P)
742 -- can construct the following structure:
756 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
757 -- pattern P. If a subsequent failure causes P to be matched and
758 -- this match succeeds, then node A gets restacked to try another
759 -- instance if needed by a subsequent failure.
761 -- The node numbering of the constituent pattern P is not affected.
762 -- The S node has a node number of P.Index + 1.
764 --------------------------
765 -- Arbno (complex case) --
766 --------------------------
768 -- A call to Arbno (P), where P can match null (or at least is not
769 -- known to require a non-null string) and/or P requires pattern stack
770 -- entries, constructs the following structure:
772 -- +--------------------------+
780 -- +---+ +---+ +---+ |
781 -- | E |---->| P |---->| Y |--->+
784 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
785 -- E-P-X structure used to match one Arbno instance.
787 -- Here E is the PC_R_Enter node which matches null and creates two
788 -- stack entries. The first is a special entry whose node field is
789 -- not used at all, and whose cursor field has the initial cursor.
791 -- The second entry corresponds to a standard new region action. A
792 -- PC_R_Remove node is stacked, whose cursor field is used to store
793 -- the outer stack base, and the stack base is reset to point to
794 -- this PC_R_Remove node. Then the pattern P is matched, and it can
795 -- make history stack entries in the normal manner, so now the stack
798 -- (stack entries made before assign pattern)
800 -- (Special entry, node field not used,
801 -- used only to save initial cursor)
803 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
804 -- saved base value for the enclosing region)
806 -- (stack entries made by matching P)
808 -- If the match of P fails, then the PC_R_Remove entry is popped and
809 -- it removes both itself and the special entry underneath it,
810 -- restores the outer stack base, and signals failure.
812 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
813 -- the inner region. There are two possibilities. If matching P left
814 -- no stack entries, then all traces of the inner region can be removed.
815 -- If there are stack entries, then we push an PC_Region_Replace stack
816 -- entry whose "cursor" value is the inner stack base value, and then
817 -- restore the outer stack base value, so the stack looks like:
819 -- (stack entries made before assign pattern)
821 -- (Special entry, node field not used,
822 -- used only to save initial cursor)
824 -- (PC_R_Remove entry, "cursor" value is (negative)
825 -- saved base value for the enclosing region)
827 -- (stack entries made by matching P)
829 -- (PC_Region_Replace entry, "cursor" value is (negative)
830 -- stack pointer value referencing the PC_R_Remove entry).
832 -- Now that we have matched another instance of the Arbno pattern,
833 -- we need to move to the successor. There are two cases. If the
834 -- Arbno pattern matched null, then there is no point in seeking
835 -- alternatives, since we would just match a whole bunch of nulls.
836 -- In this case we look through the alternative node, and move
837 -- directly to its successor (i.e. the successor of the Arbno
838 -- pattern). If on the other hand a non-null string was matched,
839 -- we simply follow the successor to the alternative node, which
840 -- sets up for another possible match of the Arbno pattern.
842 -- As noted in the section on stack checking, the stack count (and
843 -- hence the stack check) for a pattern includes only one iteration
844 -- of the Arbno pattern. To make sure that multiple iterations do not
845 -- overflow the stack, the Arbno node saves the stack count required
846 -- by a single iteration, and the Concat function increments this to
847 -- include stack entries required by any successor. The PC_Arbno_Y
848 -- node uses this count to ensure that sufficient stack remains
849 -- before proceeding after matching each new instance.
851 -- The node numbering of the constituent pattern P is not affected.
852 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
853 -- the E node is N + 2, and the X node is N + 3.
855 ----------------------
856 -- Assign Immediate --
857 ----------------------
859 -- Immediate assignment (P * V) constructs the following structure
862 -- | E |---->| P |---->| A |---->
865 -- Here E is the PC_R_Enter node which matches null and creates two
866 -- stack entries. The first is a special entry whose node field is
867 -- not used at all, and whose cursor field has the initial cursor.
869 -- The second entry corresponds to a standard new region action. A
870 -- PC_R_Remove node is stacked, whose cursor field is used to store
871 -- the outer stack base, and the stack base is reset to point to
872 -- this PC_R_Remove node. Then the pattern P is matched, and it can
873 -- make history stack entries in the normal manner, so now the stack
876 -- (stack entries made before assign pattern)
878 -- (Special entry, node field not used,
879 -- used only to save initial cursor)
881 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
882 -- saved base value for the enclosing region)
884 -- (stack entries made by matching P)
886 -- If the match of P fails, then the PC_R_Remove entry is popped
887 -- and it removes both itself and the special entry underneath it,
888 -- restores the outer stack base, and signals failure.
890 -- If the match of P succeeds, then node A, which is the actual
891 -- PC_Assign_Imm node, executes the assignment (using the stack
892 -- base to locate the entry with the saved starting cursor value),
893 -- and the pops the inner region. There are two possibilities, if
894 -- matching P left no stack entries, then all traces of the inner
895 -- region can be removed. If there are stack entries, then we push
896 -- an PC_Region_Replace stack entry whose "cursor" value is the
897 -- inner stack base value, and then restore the outer stack base
898 -- value, so the stack looks like:
900 -- (stack entries made before assign pattern)
902 -- (Special entry, node field not used,
903 -- used only to save initial cursor)
905 -- (PC_R_Remove entry, "cursor" value is (negative)
906 -- saved base value for the enclosing region)
908 -- (stack entries made by matching P)
910 -- (PC_Region_Replace entry, "cursor" value is the (negative)
911 -- stack pointer value referencing the PC_R_Remove entry).
913 -- If a subsequent failure occurs, the PC_Region_Replace node restores
914 -- the inner stack base value and signals failure to explore rematches
917 -- The node numbering of the constituent pattern P is not affected.
918 -- Where N is the number of nodes in P, the A node is numbered N + 1,
919 -- and the E node is N + 2.
921 ---------------------
922 -- Assign On Match --
923 ---------------------
925 -- The assign on match (**) pattern is quite similar to the assign
926 -- immediate pattern, except that the actual assignment has to be
927 -- delayed. The following structure is constructed:
930 -- | E |---->| P |---->| A |---->
933 -- The operation of this pattern is identical to that described above
934 -- for deferred assignment, up to the point where P has been matched.
936 -- The A node, which is the PC_Assign_OnM node first pushes a
937 -- PC_Assign node onto the history stack. This node saves the ending
938 -- cursor and acts as a flag for the final assignment, as further
941 -- It then stores a pointer to itself in the special entry node field.
942 -- This was otherwise unused, and is now used to retrieve the address
943 -- of the variable to be assigned at the end of the pattern.
945 -- After that the inner region is terminated in the usual manner,
946 -- by stacking a PC_R_Restore entry as described for the assign
947 -- immediate case. Note that the optimization of completely
948 -- removing the inner region does not happen in this case, since
949 -- we have at least one stack entry (the PC_Assign one we just made).
950 -- The stack now looks like:
952 -- (stack entries made before assign pattern)
954 -- (Special entry, node points to copy of
955 -- the PC_Assign_OnM node, and the
956 -- cursor field saves the initial cursor).
958 -- (PC_R_Remove entry, "cursor" value is (negative)
959 -- saved base value for the enclosing region)
961 -- (stack entries made by matching P)
963 -- (PC_Assign entry, saves final cursor)
965 -- (PC_Region_Replace entry, "cursor" value is (negative)
966 -- stack pointer value referencing the PC_R_Remove entry).
968 -- If a subsequent failure causes the PC_Assign node to execute it
969 -- simply removes itself and propagates the failure.
971 -- If the match succeeds, then the history stack is scanned for
972 -- PC_Assign nodes, and the assignments are executed (examination
973 -- of the above diagram will show that all the necessary data is
974 -- at hand for the assignment).
976 -- To optimize the common case where no assign-on-match operations
977 -- are present, a global flag Assign_OnM is maintained which is
978 -- initialize to False, and gets set True as part of the execution
979 -- of the PC_Assign_OnM node. The scan of the history stack for
980 -- PC_Assign entries is done only if this flag is set.
982 -- The node numbering of the constituent pattern P is not affected.
983 -- Where N is the number of nodes in P, the A node is numbered N + 1,
984 -- and the E node is N + 2.
990 -- Bal builds a single node:
996 -- The node B is the PC_Bal node which matches a parentheses balanced
997 -- string, starting at the current cursor position. It then updates
998 -- the cursor past this matched string, and stacks a pointer to itself
999 -- with this updated cursor value on the history stack, to extend the
1000 -- matched string on a subsequent failure.
1002 -- Since this is a single node it is numbered 1 (the reason we include
1003 -- it in the compound patterns section is that it backtracks).
1009 -- BreakX builds the structure
1012 -- | B |---->| A |---->
1020 -- Here the B node is the BreakX_xx node that performs a normal Break
1021 -- function. The A node is an alternative (PC_Alt) node that matches
1022 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1023 -- extends the match one character (to eat up the previously detected
1024 -- break character), and then rematches the break.
1026 -- The B node is numbered 3, the alternative node is 1, and the X
1033 -- Fence builds a single node:
1039 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1040 -- PC_Cancel element which will abort the match on a subsequent failure.
1042 -- Since this is a single element it is numbered 1 (the reason we
1043 -- include it in the compound patterns section is that it backtracks).
1045 --------------------
1046 -- Fence Function --
1047 --------------------
1049 -- A call to the Fence function builds the structure:
1051 -- +---+ +---+ +---+
1052 -- | E |---->| P |---->| X |---->
1053 -- +---+ +---+ +---+
1055 -- Here E is the PC_R_Enter node which matches null and creates two
1056 -- stack entries. The first is a special entry which is not used at
1057 -- all in the fence case (it is present merely for uniformity with
1058 -- other cases of region enter operations).
1060 -- The second entry corresponds to a standard new region action. A
1061 -- PC_R_Remove node is stacked, whose cursor field is used to store
1062 -- the outer stack base, and the stack base is reset to point to
1063 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1064 -- make history stack entries in the normal manner, so now the stack
1067 -- (stack entries made before fence pattern)
1069 -- (Special entry, not used at all)
1071 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1072 -- saved base value for the enclosing region)
1074 -- (stack entries made by matching P)
1076 -- If the match of P fails, then the PC_R_Remove entry is popped
1077 -- and it removes both itself and the special entry underneath it,
1078 -- restores the outer stack base, and signals failure.
1080 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1081 -- control. One might be tempted to think that at this point, the
1082 -- history stack entries made by matching P can just be removed since
1083 -- they certainly are not going to be used for rematching (that is
1084 -- whole point of Fence after all!) However, this is wrong, because
1085 -- it would result in the loss of possible assign-on-match entries
1086 -- for deferred pattern assignments.
1088 -- Instead what we do is to make a special entry whose node references
1089 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1090 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1091 -- pointer is restored, so the stack looks like:
1093 -- (stack entries made before assign pattern)
1095 -- (Special entry, not used at all)
1097 -- (PC_R_Remove entry, "cursor" value is (negative)
1098 -- saved base value for the enclosing region)
1100 -- (stack entries made by matching P)
1102 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1103 -- pointer value referencing the PC_R_Remove entry).
1105 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1106 -- the entire inner region, including all entries made by matching P,
1107 -- and alternatives prior to the Fence pattern are sought.
1109 -- The node numbering of the constituent pattern P is not affected.
1110 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1111 -- and the E node is N + 2.
1117 -- Succeed builds a single node:
1123 -- The node S is the PC_Succeed node which matches null, and stacks
1124 -- a pointer to itself on the history stack, so that a subsequent
1125 -- failure repeats the same match.
1127 -- Since this is a single node it is numbered 1 (the reason we include
1128 -- it in the compound patterns section is that it backtracks).
1130 ---------------------
1131 -- Write Immediate --
1132 ---------------------
1134 -- The structure built for a write immediate operation (P * F, where
1135 -- F is a file access value) is:
1137 -- +---+ +---+ +---+
1138 -- | E |---->| P |---->| W |---->
1139 -- +---+ +---+ +---+
1141 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1142 -- handling is identical to that described above for Assign Immediate,
1143 -- except that at the point where a successful match occurs, the matched
1144 -- substring is written to the referenced file.
1146 -- The node numbering of the constituent pattern P is not affected.
1147 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1148 -- and the E node is N + 2.
1150 --------------------
1151 -- Write On Match --
1152 --------------------
1154 -- The structure built for a write on match operation (P ** F, where
1155 -- F is a file access value) is:
1157 -- +---+ +---+ +---+
1158 -- | E |---->| P |---->| W |---->
1159 -- +---+ +---+ +---+
1161 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1162 -- handling is identical to that described above for Assign On Match,
1163 -- except that at the point where a successful match has completed,
1164 -- the matched substring is written to the referenced file.
1166 -- The node numbering of the constituent pattern P is not affected.
1167 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1168 -- and the E node is N + 2.
1169 -----------------------
1170 -- Constant Patterns --
1171 -----------------------
1173 -- The following pattern elements are referenced only from the pattern
1174 -- history stack. In each case the processing for the pattern element
1175 -- results in pattern match abort, or further failure, so there is no
1176 -- need for a successor and no need for a node number
1178 CP_Assign : aliased PE := (PC_Assign, 0, N);
1179 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1180 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1181 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1182 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1184 -----------------------
1185 -- Local Subprograms --
1186 -----------------------
1188 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1189 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1190 -- Build pattern structure corresponding to the alternation of L, R.
1191 -- (i.e. try to match L, and if that fails, try to match R).
1193 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1194 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1195 -- match at least one character if it succeeds and to require no
1196 -- stack entries under all circumstances. The result returned is
1197 -- a simple Arbno structure as previously described.
1199 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1200 -- Given two single node pattern elements E and A, and a (possible
1201 -- complex) pattern P, construct the concatenation E-->P-->A and
1202 -- return a pointer to E. The concatenation does not affect the
1203 -- node numbering in P. A has a number one higher than the maximum
1204 -- number in P, and E has a number two higher than the maximum
1205 -- number in P (see for example the Assign_Immediate structure to
1206 -- understand a typical use of this function).
1208 function BreakX_Make (B : PE_Ptr) return Pattern;
1209 -- Given a pattern element for a Break pattern, returns the
1210 -- corresponding BreakX compound pattern structure.
1212 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1213 -- Creates a pattern element that represents a concatenation of the
1214 -- two given pattern elements (i.e. the pattern L followed by R).
1215 -- The result returned is always the same as L, but the pattern
1216 -- referenced by L is modified to have R as a successor. This
1217 -- procedure does not copy L or R, so if a copy is required, it
1218 -- is the responsibility of the caller. The Incr parameter is an
1219 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1220 -- in the left operand, it represents the additional stack space
1221 -- required by the right operand.
1223 function C_To_PE (C : PChar) return PE_Ptr;
1224 -- Given a character, constructs a pattern element that matches
1225 -- the single character.
1227 function Copy (P : PE_Ptr) return PE_Ptr;
1228 -- Creates a copy of the pattern element referenced by the given
1229 -- pattern element reference. This is a deep copy, which means that
1230 -- it follows the Next and Alt pointers.
1232 function Image (P : PE_Ptr) return String;
1233 -- Returns the image of the address of the referenced pattern element.
1234 -- This is equivalent to Image (To_Address (P));
1236 function Is_In (C : Character; Str : String) return Boolean;
1237 pragma Inline (Is_In);
1238 -- Determines if the character C is in string Str
1240 procedure Logic_Error;
1241 -- Called to raise Program_Error with an appropriate message if an
1242 -- internal logic error is detected.
1244 function Str_BF (A : Boolean_Func) return String;
1245 function Str_FP (A : File_Ptr) return String;
1246 function Str_NF (A : Natural_Func) return String;
1247 function Str_NP (A : Natural_Ptr) return String;
1248 function Str_PP (A : Pattern_Ptr) return String;
1249 function Str_VF (A : VString_Func) return String;
1250 function Str_VP (A : VString_Ptr) return String;
1251 -- These are debugging routines, which return a representation of the
1252 -- given access value (they are called only by Image and Dump)
1254 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1255 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1256 -- are made. In particular, Succ is unchanged, and no index numbers
1257 -- are modified. Note that Pat may not be equal to EOP on entry.
1259 function S_To_PE (Str : PString) return PE_Ptr;
1260 -- Given a string, constructs a pattern element that matches the string
1262 procedure Uninitialized_Pattern;
1263 pragma No_Return (Uninitialized_Pattern);
1264 -- Called to raise Program_Error with an appropriate error message if
1265 -- an uninitialized pattern is used in any pattern construction or
1266 -- pattern matching operation.
1272 Start : out Natural;
1273 Stop : out Natural);
1274 -- This is the common pattern match routine. It is passed a string and
1275 -- a pattern, and it indicates success or failure, and on success the
1276 -- section of the string matched. It does not perform any assignments
1277 -- to the subject string, so pattern replacement is for the caller.
1279 -- Subject The subject string. The lower bound is always one. In the
1280 -- Match procedures, it is fine to use strings whose lower bound
1281 -- is not one, but we perform a one time conversion before the
1282 -- call to XMatch, so that XMatch does not have to be bothered
1283 -- with strange lower bounds.
1285 -- Pat_P Points to initial pattern element of pattern to be matched
1287 -- Pat_S Maximum required stack entries for pattern to be matched
1289 -- Start If match is successful, starting index of matched section.
1290 -- This value is always non-zero. A value of zero is used to
1291 -- indicate a failed match.
1293 -- Stop If match is successful, ending index of matched section.
1294 -- This can be zero if we match the null string at the start,
1295 -- in which case Start is set to zero, and Stop to one. If the
1296 -- Match fails, then the contents of Stop is undefined.
1302 Start : out Natural;
1303 Stop : out Natural);
1304 -- Identical in all respects to XMatch, except that trace information is
1305 -- output on Standard_Output during execution of the match. This is the
1306 -- version that is called if the original Match call has Debug => True.
1312 function "&" (L : PString; R : Pattern) return Pattern is
1314 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1317 function "&" (L : Pattern; R : PString) return Pattern is
1319 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1322 function "&" (L : PChar; R : Pattern) return Pattern is
1324 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1327 function "&" (L : Pattern; R : PChar) return Pattern is
1329 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1332 function "&" (L : Pattern; R : Pattern) return Pattern is
1334 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1343 -- +---+ +---+ +---+
1344 -- | E |---->| P |---->| A |---->
1345 -- +---+ +---+ +---+
1347 -- The node numbering of the constituent pattern P is not affected.
1348 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1349 -- and the E node is N + 2.
1351 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1352 Pat : constant PE_Ptr := Copy (P.P);
1353 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1354 A : constant PE_Ptr :=
1355 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1357 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1360 function "*" (P : PString; Var : VString_Var) return Pattern is
1361 Pat : constant PE_Ptr := S_To_PE (P);
1362 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1363 A : constant PE_Ptr :=
1364 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1366 return (AFC with 3, Bracket (E, Pat, A));
1369 function "*" (P : PChar; Var : VString_Var) return Pattern is
1370 Pat : constant PE_Ptr := C_To_PE (P);
1371 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1372 A : constant PE_Ptr :=
1373 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1375 return (AFC with 3, Bracket (E, Pat, A));
1380 -- +---+ +---+ +---+
1381 -- | E |---->| P |---->| W |---->
1382 -- +---+ +---+ +---+
1384 -- The node numbering of the constituent pattern P is not affected.
1385 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1386 -- and the E node is N + 2.
1388 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1389 Pat : constant PE_Ptr := Copy (P.P);
1390 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1391 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1393 return (AFC with 3, Bracket (E, Pat, W));
1396 function "*" (P : PString; Fil : File_Access) return Pattern is
1397 Pat : constant PE_Ptr := S_To_PE (P);
1398 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1399 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1401 return (AFC with 3, Bracket (E, Pat, W));
1404 function "*" (P : PChar; Fil : File_Access) return Pattern is
1405 Pat : constant PE_Ptr := C_To_PE (P);
1406 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1407 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1409 return (AFC with 3, Bracket (E, Pat, W));
1418 -- +---+ +---+ +---+
1419 -- | E |---->| P |---->| A |---->
1420 -- +---+ +---+ +---+
1422 -- The node numbering of the constituent pattern P is not affected.
1423 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1424 -- and the E node is N + 2.
1426 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1427 Pat : constant PE_Ptr := Copy (P.P);
1428 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1429 A : constant PE_Ptr :=
1430 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1432 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1435 function "**" (P : PString; Var : VString_Var) return Pattern is
1436 Pat : constant PE_Ptr := S_To_PE (P);
1437 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1438 A : constant PE_Ptr :=
1439 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1441 return (AFC with 3, Bracket (E, Pat, A));
1444 function "**" (P : PChar; Var : VString_Var) return Pattern is
1445 Pat : constant PE_Ptr := C_To_PE (P);
1446 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1447 A : constant PE_Ptr :=
1448 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1450 return (AFC with 3, Bracket (E, Pat, A));
1455 -- +---+ +---+ +---+
1456 -- | E |---->| P |---->| W |---->
1457 -- +---+ +---+ +---+
1459 -- The node numbering of the constituent pattern P is not affected.
1460 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1461 -- and the E node is N + 2.
1463 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1464 Pat : constant PE_Ptr := Copy (P.P);
1465 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1466 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1468 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1471 function "**" (P : PString; Fil : File_Access) return Pattern is
1472 Pat : constant PE_Ptr := S_To_PE (P);
1473 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1474 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1476 return (AFC with 3, Bracket (E, Pat, W));
1479 function "**" (P : PChar; Fil : File_Access) return Pattern is
1480 Pat : constant PE_Ptr := C_To_PE (P);
1481 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1482 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1484 return (AFC with 3, Bracket (E, Pat, W));
1491 function "+" (Str : VString_Var) return Pattern is
1495 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1498 function "+" (Str : VString_Func) return Pattern is
1500 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1503 function "+" (P : Pattern_Var) return Pattern is
1507 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1510 function "+" (P : Boolean_Func) return Pattern is
1512 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1519 function "or" (L : PString; R : Pattern) return Pattern is
1521 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1524 function "or" (L : Pattern; R : PString) return Pattern is
1526 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1529 function "or" (L : PString; R : PString) return Pattern is
1531 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1534 function "or" (L : Pattern; R : Pattern) return Pattern is
1537 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1540 function "or" (L : PChar; R : Pattern) return Pattern is
1542 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1545 function "or" (L : Pattern; R : PChar) return Pattern is
1547 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1550 function "or" (L : PChar; R : PChar) return Pattern is
1552 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1555 function "or" (L : PString; R : PChar) return Pattern is
1557 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1560 function "or" (L : PChar; R : PString) return Pattern is
1562 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1569 -- No two patterns share the same pattern elements, so the adjust
1570 -- procedure for a Pattern assignment must do a deep copy of the
1571 -- pattern element structure.
1573 procedure Adjust (Object : in out Pattern) is
1575 Object.P := Copy (Object.P);
1582 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1584 -- If the left pattern is null, then we just add the alternation
1585 -- node with an index one greater than the right hand pattern.
1588 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1590 -- If the left pattern is non-null, then build a reference vector
1591 -- for its elements, and adjust their index values to accommodate
1592 -- the right hand elements. Then add the alternation node.
1596 Refs : Ref_Array (1 .. L.Index);
1599 Build_Ref_Array (L, Refs);
1601 for J in Refs'Range loop
1602 Refs (J).Index := Refs (J).Index + R.Index;
1606 return new PE'(PC_Alt, L.Index + 1, L, R);
1614 function Any (Str : String) return Pattern is
1616 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1619 function Any (Str : VString) return Pattern is
1621 return Any (S (Str));
1624 function Any (Str : Character) return Pattern is
1626 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1629 function Any (Str : Character_Set) return Pattern is
1631 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1634 function Any (Str : not null access VString) return Pattern is
1636 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1639 function Any (Str : VString_Func) return Pattern is
1641 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1657 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1
1659 function Arb return Pattern is
1660 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1661 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1663 return (AFC with 1, X);
1670 function Arbno (P : PString) return Pattern is
1672 if P'Length = 0 then
1673 return (AFC with 0, EOP);
1675 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1679 function Arbno (P : PChar) return Pattern is
1681 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1684 function Arbno (P : Pattern) return Pattern is
1685 Pat : constant PE_Ptr := Copy (P.P);
1689 and then OK_For_Simple_Arbno (Pat.Pcode)
1691 return (AFC with 0, Arbno_Simple (Pat));
1694 -- This is the complex case, either the pattern makes stack entries
1695 -- or it is possible for the pattern to match the null string (more
1696 -- accurately, we don't know that this is not the case).
1698 -- +--------------------------+
1706 -- +---+ +---+ +---+ |
1707 -- | E |---->| P |---->| Y |--->+
1708 -- +---+ +---+ +---+
1710 -- The node numbering of the constituent pattern P is not affected.
1711 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1712 -- the E node is N + 2, and the X node is N + 3.
1715 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1716 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1717 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1718 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1721 X.Index := EPY.Index + 1;
1722 return (AFC with P.Stk + 3, X);
1739 -- | P |---------->+
1742 -- The node numbering of the constituent pattern P is not affected.
1743 -- The S node has a node number of P.Index + 1.
1745 -- Note that we know that P cannot be EOP, because a null pattern
1746 -- does not meet the requirements for simple Arbno.
1748 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1749 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1751 Set_Successor (P, S);
1759 function Bal return Pattern is
1761 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1768 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1777 Set_Successor (P, A);
1778 E.Index := P.Index + 2;
1779 A.Index := P.Index + 1;
1789 function Break (Str : String) return Pattern is
1791 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1794 function Break (Str : VString) return Pattern is
1796 return Break (S (Str));
1799 function Break (Str : Character) return Pattern is
1801 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1804 function Break (Str : Character_Set) return Pattern is
1806 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1809 function Break (Str : not null access VString) return Pattern is
1812 new PE'(PC_Break_VP, 1, EOP, Str.all'Unchecked_Access));
1815 function Break (Str : VString_Func) return Pattern is
1817 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1824 function BreakX (Str : String) return Pattern is
1826 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1829 function BreakX (Str : VString) return Pattern is
1831 return BreakX (S (Str));
1834 function BreakX (Str : Character) return Pattern is
1836 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1839 function BreakX (Str : Character_Set) return Pattern is
1841 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1844 function BreakX (Str : not null access VString) return Pattern is
1846 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1849 function BreakX (Str : VString_Func) return Pattern is
1851 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1859 -- | B |---->| A |---->
1867 -- The B node is numbered 3, the alternative node is 1, and the X
1870 function BreakX_Make (B : PE_Ptr) return Pattern is
1871 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1872 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1875 return (AFC with 2, B);
1878 ---------------------
1879 -- Build_Ref_Array --
1880 ---------------------
1882 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1884 procedure Record_PE (E : PE_Ptr);
1885 -- Record given pattern element if not already recorded in RA,
1886 -- and also record any referenced pattern elements recursively.
1892 procedure Record_PE (E : PE_Ptr) is
1894 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1896 if E = EOP or else RA (E.Index) /= null then
1897 Put_LineD (", nothing to do");
1901 Put_LineD (", recording" & IndexT'Image (E.Index));
1903 Record_PE (E.Pthen);
1905 if E.Pcode in PC_Has_Alt then
1911 -- Start of processing for Build_Ref_Array
1915 Put_LineD ("Entering Build_Ref_Array");
1918 end Build_Ref_Array;
1924 function C_To_PE (C : PChar) return PE_Ptr is
1926 return new PE'(PC_Char, 1, EOP, C);
1933 function Cancel return Pattern is
1935 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1942 -- Concat needs to traverse the left operand performing the following
1945 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1946 -- reset to point to the second operand.
1948 -- b) Any PC_Arbno_Y node has its stack count field incremented
1949 -- by the parameter Incr provided for this purpose.
1951 -- d) Num fields of all pattern elements in the left operand are
1952 -- adjusted to include the elements of the right operand.
1954 -- Note: we do not use Set_Successor in the processing for Concat, since
1955 -- there is no point in doing two traversals, we may as well do everything
1956 -- at the same time.
1958 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1968 Refs : Ref_Array (1 .. L.Index);
1969 -- We build a reference array for L whose N'th element points to
1970 -- the pattern element of L whose original Index value is N.
1975 Build_Ref_Array (L, Refs);
1977 for J in Refs'Range loop
1980 P.Index := P.Index + R.Index;
1982 if P.Pcode = PC_Arbno_Y then
1983 P.Nat := P.Nat + Incr;
1986 if P.Pthen = EOP then
1990 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2004 function Copy (P : PE_Ptr) return PE_Ptr is
2007 Uninitialized_Pattern;
2011 Refs : Ref_Array (1 .. P.Index);
2012 -- References to elements in P, indexed by Index field
2014 Copy : Ref_Array (1 .. P.Index);
2015 -- Holds copies of elements of P, indexed by Index field
2020 Build_Ref_Array (P, Refs);
2022 -- Now copy all nodes
2024 for J in Refs'Range loop
2025 Copy (J) := new PE'(Refs (J).all);
2028 -- Adjust all internal references
2030 for J in Copy'Range loop
2033 -- Adjust successor pointer to point to copy
2035 if E.Pthen /= EOP then
2036 E.Pthen := Copy (E.Pthen.Index);
2039 -- Adjust Alt pointer if there is one to point to copy
2041 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2042 E.Alt := Copy (E.Alt.Index);
2045 -- Copy referenced string
2047 if E.Pcode = PC_String then
2048 E.Str := new String'(E.Str.all);
2052 return Copy (P.Index);
2061 procedure Dump (P : Pattern) is
2063 subtype Count is Ada.Text_IO.Count;
2065 -- Used to keep track of column in dump output
2067 Refs : Ref_Array (1 .. P.P.Index);
2068 -- We build a reference array whose N'th element points to the
2069 -- pattern element whose Index value is N.
2071 Cols : Natural := 2;
2072 -- Number of columns used for pattern numbers, minimum is 2
2076 procedure Write_Node_Id (E : PE_Ptr);
2077 -- Writes out a string identifying the given pattern element
2083 procedure Write_Node_Id (E : PE_Ptr) is
2088 for J in 4 .. Cols loop
2094 Str : String (1 .. Cols);
2095 N : Natural := Natural (E.Index);
2100 for J in reverse Str'Range loop
2101 Str (J) := Character'Val (48 + N mod 10);
2110 -- Start of processing for Dump
2114 Put ("Pattern Dump Output (pattern at " &
2116 ", S = " & Natural'Image (P.Stk) & ')');
2121 while Col < Scol loop
2127 -- If uninitialized pattern, dump line and we are done
2130 Put_Line ("Uninitialized pattern value");
2134 -- If null pattern, just dump it and we are all done
2137 Put_Line ("EOP (null pattern)");
2141 Build_Ref_Array (P.P, Refs);
2143 -- Set number of columns required for node numbers
2145 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2149 -- Now dump the nodes in reverse sequence. We output them in reverse
2150 -- sequence since this corresponds to the natural order used to
2151 -- construct the patterns.
2153 for J in reverse Refs'Range loop
2156 Set_Col (Count (Cols) + 4);
2159 Put (Pattern_Code'Image (E.Pcode));
2161 Set_Col (21 + Count (Cols) + Address_Image_Length);
2162 Write_Node_Id (E.Pthen);
2163 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2171 Write_Node_Id (E.Alt);
2174 Put (Str_PP (E.PP));
2176 when PC_Pred_Func =>
2177 Put (Str_BF (E.BF));
2179 when PC_Assign_Imm |
2188 Put (Str_VP (E.VP));
2192 Put (Str_FP (E.FP));
2195 Put (Image (E.Str.all));
2198 Put (Image (E.Str2));
2201 Put (Image (E.Str3));
2204 Put (Image (E.Str4));
2207 Put (Image (E.Str5));
2210 Put (Image (E.Str6));
2213 Put (Str_NP (E.Var));
2222 Put (''' & E.Char & ''');
2230 Put ('"' & To_Sequence (E.CS) & '"');
2245 Put (Str_NF (E.NF));
2252 Put (Str_NP (E.NP));
2261 Put (Str_VF (E.VF));
2263 when others => null;
2277 function Fail return Pattern is
2279 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2288 function Fence return Pattern is
2290 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2295 -- +---+ +---+ +---+
2296 -- | E |---->| P |---->| X |---->
2297 -- +---+ +---+ +---+
2299 -- The node numbering of the constituent pattern P is not affected.
2300 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2301 -- and the E node is N + 2.
2303 function Fence (P : Pattern) return Pattern is
2304 Pat : constant PE_Ptr := Copy (P.P);
2305 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2306 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2308 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2315 procedure Finalize (Object : in out Pattern) is
2317 procedure Free is new Ada.Unchecked_Deallocation (PE, PE_Ptr);
2318 procedure Free is new Ada.Unchecked_Deallocation (String, String_Ptr);
2321 -- Nothing to do if already freed
2323 if Object.P = null then
2326 -- Otherwise we must free all elements
2330 Refs : Ref_Array (1 .. Object.P.Index);
2331 -- References to elements in pattern to be finalized
2334 Build_Ref_Array (Object.P, Refs);
2336 for J in Refs'Range loop
2337 if Refs (J).Pcode = PC_String then
2338 Free (Refs (J).Str);
2353 function Image (P : PE_Ptr) return String is
2355 return Image (To_Address (P));
2358 function Image (P : Pattern) return String is
2360 return S (Image (P));
2363 function Image (P : Pattern) return VString is
2365 Kill_Ampersand : Boolean := False;
2366 -- Set True to delete next & to be output to Result
2368 Result : VString := Nul;
2369 -- The result is accumulated here, using Append
2371 Refs : Ref_Array (1 .. P.P.Index);
2372 -- We build a reference array whose N'th element points to the
2373 -- pattern element whose Index value is N.
2375 procedure Delete_Ampersand;
2376 -- Deletes the ampersand at the end of Result
2378 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2379 -- E refers to a pattern structure whose successor is given by Succ.
2380 -- This procedure appends to Result a representation of this pattern.
2381 -- The Paren parameter indicates whether parentheses are required if
2382 -- the output is more than one element.
2384 procedure Image_One (E : in out PE_Ptr);
2385 -- E refers to a pattern structure. This procedure appends to Result
2386 -- a representation of the single simple or compound pattern structure
2387 -- at the start of E and updates E to point to its successor.
2389 ----------------------
2390 -- Delete_Ampersand --
2391 ----------------------
2393 procedure Delete_Ampersand is
2394 L : constant Natural := Length (Result);
2397 Delete (Result, L - 1, L);
2399 end Delete_Ampersand;
2405 procedure Image_One (E : in out PE_Ptr) is
2407 ER : PE_Ptr := E.Pthen;
2408 -- Successor set as result in E unless reset
2414 Append (Result, "Cancel");
2416 when PC_Alt => Alt : declare
2418 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2419 -- Number of elements in left pattern of alternation
2421 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2422 -- Number of lowest index in elements of left pattern
2427 -- The successor of the alternation node must have a lower
2428 -- index than any node that is in the left pattern or a
2429 -- higher index than the alternation node itself.
2432 and then ER.Index >= Lowest_In_L
2433 and then ER.Index < E.Index
2438 Append (Result, '(');
2442 Image_Seq (E1.Pthen, ER, False);
2443 Append (Result, " or ");
2445 exit when E1.Pcode /= PC_Alt;
2448 Image_Seq (E1, ER, False);
2449 Append (Result, ')');
2453 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2456 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2459 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2462 Append (Result, "Arb");
2465 Append (Result, "Arbno (");
2466 Image_Seq (E.Alt, E, False);
2467 Append (Result, ')');
2470 Append (Result, "Arbno (");
2471 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2472 Append (Result, ')');
2474 when PC_Assign_Imm =>
2476 Append (Result, "* " & Str_VP (Refs (E.Index).VP));
2478 when PC_Assign_OnM =>
2480 Append (Result, "** " & Str_VP (Refs (E.Index).VP));
2483 Append (Result, "Any ('" & E.Char & "')");
2486 Append (Result, "Bal");
2489 Append (Result, "Break ('" & E.Char & "')");
2492 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2495 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2498 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2500 when PC_BreakX_CH =>
2501 Append (Result, "BreakX ('" & E.Char & "')");
2504 when PC_BreakX_CS =>
2505 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2508 when PC_BreakX_VF =>
2509 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2512 when PC_BreakX_VP =>
2513 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2517 Append (Result, ''' & E.Char & ''');
2520 Append (Result, "Fail");
2523 Append (Result, "Fence");
2526 Append (Result, "Fence (");
2527 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2528 Append (Result, ")");
2529 ER := Refs (E.Index - 1).Pthen;
2532 Append (Result, "Len (" & E.Nat & ')');
2535 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2538 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2540 when PC_NotAny_CH =>
2541 Append (Result, "NotAny ('" & E.Char & "')");
2543 when PC_NotAny_CS =>
2544 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2546 when PC_NotAny_VF =>
2547 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2549 when PC_NotAny_VP =>
2550 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2553 Append (Result, "NSpan ('" & E.Char & "')");
2556 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2559 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2562 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2565 Append (Result, """""");
2568 Append (Result, "Pos (" & E.Nat & ')');
2571 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2574 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2577 Kill_Ampersand := True;
2580 Append (Result, "Rest");
2583 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2585 when PC_Pred_Func =>
2586 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2589 Append (Result, "RPos (" & E.Nat & ')');
2592 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2595 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2598 Append (Result, "RTab (" & E.Nat & ')');
2601 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2604 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2607 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2610 Append (Result, "Span ('" & E.Char & "')");
2613 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2616 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2619 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2622 Append (Result, Image (E.Str.all));
2625 Append (Result, Image (E.Str2));
2628 Append (Result, Image (E.Str3));
2631 Append (Result, Image (E.Str4));
2634 Append (Result, Image (E.Str5));
2637 Append (Result, Image (E.Str6));
2639 when PC_String_VF =>
2640 Append (Result, "(+" & Str_VF (E.VF) & ')');
2642 when PC_String_VP =>
2643 Append (Result, "(+" & Str_VP (E.VP) & ')');
2646 Append (Result, "Succeed");
2649 Append (Result, "Tab (" & E.Nat & ')');
2652 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2655 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2657 when PC_Write_Imm =>
2658 Append (Result, '(');
2659 Image_Seq (E, Refs (E.Index - 1), True);
2660 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2661 ER := Refs (E.Index - 1).Pthen;
2663 when PC_Write_OnM =>
2664 Append (Result, '(');
2665 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2666 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2667 ER := Refs (E.Index - 1).Pthen;
2669 -- Other pattern codes should not appear as leading elements
2680 Append (Result, "???");
2691 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2692 Indx : constant Natural := Length (Result);
2694 Mult : Boolean := False;
2697 -- The image of EOP is "" (the null string)
2700 Append (Result, """""");
2702 -- Else generate appropriate concatenation sequence
2707 exit when E1 = Succ;
2711 if Kill_Ampersand then
2712 Kill_Ampersand := False;
2714 Append (Result, " & ");
2719 if Mult and Paren then
2720 Insert (Result, Indx + 1, "(");
2721 Append (Result, ")");
2725 -- Start of processing for Image
2728 Build_Ref_Array (P.P, Refs);
2729 Image_Seq (P.P, EOP, False);
2737 function Is_In (C : Character; Str : String) return Boolean is
2739 for J in Str'Range loop
2752 function Len (Count : Natural) return Pattern is
2754 -- Note, the following is not just an optimization, it is needed
2755 -- to ensure that Arbno (Len (0)) does not generate an infinite
2756 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2759 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2762 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2766 function Len (Count : Natural_Func) return Pattern is
2768 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2771 function Len (Count : not null access Natural) return Pattern is
2773 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2780 procedure Logic_Error is
2782 raise Program_Error with
2783 "Internal logic error in GNAT.Spitbol.Patterns";
2792 Pat : Pattern) return Boolean
2794 S : Big_String_Access;
2798 pragma Unreferenced (Stop);
2801 Get_String (Subject, S, L);
2804 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2806 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2814 Pat : Pattern) return Boolean
2816 Start, Stop : Natural;
2817 pragma Unreferenced (Stop);
2819 subtype String1 is String (1 .. Subject'Length);
2823 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2825 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2832 (Subject : VString_Var;
2834 Replace : VString) return Boolean
2838 S : Big_String_Access;
2842 Get_String (Subject, S, L);
2845 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2847 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2853 Get_String (Replace, S, L);
2855 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L));
2861 (Subject : VString_Var;
2863 Replace : String) return Boolean
2867 S : Big_String_Access;
2871 Get_String (Subject, S, L);
2874 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2876 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2883 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2892 S : Big_String_Access;
2897 pragma Unreferenced (Start, Stop);
2900 Get_String (Subject, S, L);
2903 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2905 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2913 Start, Stop : Natural;
2914 pragma Unreferenced (Start, Stop);
2916 subtype String1 is String (1 .. Subject'Length);
2920 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2922 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2927 (Subject : in out VString;
2933 S : Big_String_Access;
2937 Get_String (Subject, S, L);
2940 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2942 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2946 Get_String (Replace, S, L);
2947 Replace_Slice (Subject, Start, Stop, S (1 .. L));
2952 (Subject : in out VString;
2958 S : Big_String_Access;
2962 Get_String (Subject, S, L);
2965 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2967 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2971 Replace_Slice (Subject, Start, Stop, Replace);
2977 Pat : PString) return Boolean
2979 Pat_Len : constant Natural := Pat'Length;
2980 S : Big_String_Access;
2984 Get_String (Subject, S, L);
2986 if Anchored_Mode then
2990 return Pat = S (1 .. Pat_Len);
2994 for J in 1 .. L - Pat_Len + 1 loop
2995 if Pat = S (J .. J + (Pat_Len - 1)) then
3006 Pat : PString) return Boolean
3008 Pat_Len : constant Natural := Pat'Length;
3009 Sub_Len : constant Natural := Subject'Length;
3010 SFirst : constant Natural := Subject'First;
3013 if Anchored_Mode then
3014 if Pat_Len > Sub_Len then
3017 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
3021 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3022 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3032 (Subject : VString_Var;
3034 Replace : VString) return Boolean
3038 S : Big_String_Access;
3042 Get_String (Subject, S, L);
3045 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3047 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3053 Get_String (Replace, S, L);
3055 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L));
3061 (Subject : VString_Var;
3063 Replace : String) return Boolean
3067 S : Big_String_Access;
3071 Get_String (Subject, S, L);
3074 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3076 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3083 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3092 S : Big_String_Access;
3097 pragma Unreferenced (Start, Stop);
3100 Get_String (Subject, S, L);
3103 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3105 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3113 Start, Stop : Natural;
3114 pragma Unreferenced (Start, Stop);
3116 subtype String1 is String (1 .. Subject'Length);
3120 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3122 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3127 (Subject : in out VString;
3133 S : Big_String_Access;
3137 Get_String (Subject, S, L);
3140 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3142 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3146 Get_String (Replace, S, L);
3147 Replace_Slice (Subject, Start, Stop, S (1 .. L));
3152 (Subject : in out VString;
3158 S : Big_String_Access;
3162 Get_String (Subject, S, L);
3165 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3167 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3171 Replace_Slice (Subject, Start, Stop, Replace);
3176 (Subject : VString_Var;
3178 Result : Match_Result_Var) return Boolean
3182 S : Big_String_Access;
3186 Get_String (Subject, S, L);
3189 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3191 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3195 Result'Unrestricted_Access.all.Var := null;
3199 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3200 Result'Unrestricted_Access.all.Start := Start;
3201 Result'Unrestricted_Access.all.Stop := Stop;
3207 (Subject : in out VString;
3209 Result : out Match_Result)
3213 S : Big_String_Access;
3217 Get_String (Subject, S, L);
3220 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3222 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3228 Result.Var := Subject'Unrestricted_Access;
3229 Result.Start := Start;
3230 Result.Stop := Stop;
3238 procedure New_LineD is
3240 if Internal_Debug then
3249 function NotAny (Str : String) return Pattern is
3251 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3254 function NotAny (Str : VString) return Pattern is
3256 return NotAny (S (Str));
3259 function NotAny (Str : Character) return Pattern is
3261 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3264 function NotAny (Str : Character_Set) return Pattern is
3266 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3269 function NotAny (Str : not null access VString) return Pattern is
3271 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3274 function NotAny (Str : VString_Func) return Pattern is
3276 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3283 function NSpan (Str : String) return Pattern is
3285 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3288 function NSpan (Str : VString) return Pattern is
3290 return NSpan (S (Str));
3293 function NSpan (Str : Character) return Pattern is
3295 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3298 function NSpan (Str : Character_Set) return Pattern is
3300 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3303 function NSpan (Str : not null access VString) return Pattern is
3305 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3308 function NSpan (Str : VString_Func) return Pattern is
3310 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3317 function Pos (Count : Natural) return Pattern is
3319 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3322 function Pos (Count : Natural_Func) return Pattern is
3324 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3327 function Pos (Count : not null access Natural) return Pattern is
3329 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3336 procedure PutD (Str : String) is
3338 if Internal_Debug then
3347 procedure Put_LineD (Str : String) is
3349 if Internal_Debug then
3359 (Result : in out Match_Result;
3362 S : Big_String_Access;
3366 Get_String (Replace, S, L);
3368 if Result.Var /= null then
3369 Replace_Slice (Result.Var.all, Result.Start, Result.Stop, S (1 .. L));
3378 function Rest return Pattern is
3380 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3387 function Rpos (Count : Natural) return Pattern is
3389 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3392 function Rpos (Count : Natural_Func) return Pattern is
3394 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3397 function Rpos (Count : not null access Natural) return Pattern is
3399 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3406 function Rtab (Count : Natural) return Pattern is
3408 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3411 function Rtab (Count : Natural_Func) return Pattern is
3413 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3416 function Rtab (Count : not null access Natural) return Pattern is
3418 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3425 function S_To_PE (Str : PString) return PE_Ptr is
3426 Len : constant Natural := Str'Length;
3431 return new PE'(PC_Null, 1, EOP);
3434 return new PE'(PC_Char, 1, EOP, Str (Str'First));
3437 return new PE'(PC_String_2, 1, EOP, Str);
3440 return new PE'(PC_String_3, 1, EOP, Str);
3443 return new PE'(PC_String_4, 1, EOP, Str);
3446 return new PE'(PC_String_5, 1, EOP, Str);
3449 return new PE'(PC_String_6, 1, EOP, Str);
3452 return new PE'(PC_String, 1, EOP, new String'(Str));
3461 -- Note: this procedure is not used by the normal concatenation circuit,
3462 -- since other fixups are required on the left operand in this case, and
3463 -- they might as well be done all together.
3465 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3468 Uninitialized_Pattern;
3470 elsif Pat = EOP then
3475 Refs : Ref_Array (1 .. Pat.Index);
3476 -- We build a reference array for L whose N'th element points to
3477 -- the pattern element of L whose original Index value is N.
3482 Build_Ref_Array (Pat, Refs);
3484 for J in Refs'Range loop
3487 if P.Pthen = EOP then
3491 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3503 function Setcur (Var : not null access Natural) return Pattern is
3505 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3512 function Span (Str : String) return Pattern is
3514 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3517 function Span (Str : VString) return Pattern is
3519 return Span (S (Str));
3522 function Span (Str : Character) return Pattern is
3524 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3527 function Span (Str : Character_Set) return Pattern is
3529 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3532 function Span (Str : not null access VString) return Pattern is
3534 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3537 function Span (Str : VString_Func) return Pattern is
3539 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3546 function Str_BF (A : Boolean_Func) return String is
3547 function To_A is new Ada.Unchecked_Conversion (Boolean_Func, Address);
3549 return "BF(" & Image (To_A (A)) & ')';
3556 function Str_FP (A : File_Ptr) return String is
3558 return "FP(" & Image (A.all'Address) & ')';
3565 function Str_NF (A : Natural_Func) return String is
3566 function To_A is new Ada.Unchecked_Conversion (Natural_Func, Address);
3568 return "NF(" & Image (To_A (A)) & ')';
3575 function Str_NP (A : Natural_Ptr) return String is
3577 return "NP(" & Image (A.all'Address) & ')';
3584 function Str_PP (A : Pattern_Ptr) return String is
3586 return "PP(" & Image (A.all'Address) & ')';
3593 function Str_VF (A : VString_Func) return String is
3594 function To_A is new Ada.Unchecked_Conversion (VString_Func, Address);
3596 return "VF(" & Image (To_A (A)) & ')';
3603 function Str_VP (A : VString_Ptr) return String is
3605 return "VP(" & Image (A.all'Address) & ')';
3612 function Succeed return Pattern is
3614 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3621 function Tab (Count : Natural) return Pattern is
3623 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3626 function Tab (Count : Natural_Func) return Pattern is
3628 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3631 function Tab (Count : not null access Natural) return Pattern is
3633 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3636 ---------------------------
3637 -- Uninitialized_Pattern --
3638 ---------------------------
3640 procedure Uninitialized_Pattern is
3642 raise Program_Error with
3643 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern";
3644 end Uninitialized_Pattern;
3654 Start : out Natural;
3658 -- Pointer to current pattern node. Initialized from Pat_P, and then
3659 -- updated as the match proceeds through its constituent elements.
3661 Length : constant Natural := Subject'Length;
3662 -- Length of string (= Subject'Last, since Subject'First is always 1)
3664 Cursor : Integer := 0;
3665 -- If the value is non-negative, then this value is the index showing
3666 -- the current position of the match in the subject string. The next
3667 -- character to be matched is at Subject (Cursor + 1). Note that since
3668 -- our view of the subject string in XMatch always has a lower bound
3669 -- of one, regardless of original bounds, that this definition exactly
3670 -- corresponds to the cursor value as referenced by functions like Pos.
3672 -- If the value is negative, then this is a saved stack pointer,
3673 -- typically a base pointer of an inner or outer region. Cursor
3674 -- temporarily holds such a value when it is popped from the stack
3675 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3676 -- cursor value before the match proceeds (e.g. by propagating the
3677 -- failure and popping a "real" cursor value from the stack.
3679 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3680 -- Dummy pattern element used in the unanchored case
3683 -- The pattern matching failure stack for this call to Match
3685 Stack_Ptr : Stack_Range;
3686 -- Current stack pointer. This points to the top element of the stack
3687 -- that is currently in use. At the outer level this is the special
3688 -- entry placed on the stack according to the anchor mode.
3690 Stack_Init : constant Stack_Range := Stack'First + 1;
3691 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3692 -- initial (Stack'First) element of the stack is not used so that
3693 -- when we pop the last element off, Stack_Ptr is still in range.
3695 Stack_Base : Stack_Range;
3696 -- This value is the stack base value, i.e. the stack pointer for the
3697 -- first history stack entry in the current stack region. See separate
3698 -- section on handling of recursive pattern matches.
3700 Assign_OnM : Boolean := False;
3701 -- Set True if assign-on-match or write-on-match operations may be
3702 -- present in the history stack, which must then be scanned on a
3703 -- successful match.
3705 procedure Pop_Region;
3706 pragma Inline (Pop_Region);
3707 -- Used at the end of processing of an inner region. If the inner
3708 -- region left no stack entries, then all trace of it is removed.
3709 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3710 -- handling of alternatives in the inner region.
3712 procedure Push (Node : PE_Ptr);
3713 pragma Inline (Push);
3714 -- Make entry in pattern matching stack with current cursor value
3716 procedure Push_Region;
3717 pragma Inline (Push_Region);
3718 -- This procedure makes a new region on the history stack. The
3719 -- caller first establishes the special entry on the stack, but
3720 -- does not push the stack pointer. Then this call stacks a
3721 -- PC_Remove_Region node, on top of this entry, using the cursor
3722 -- field of the PC_Remove_Region entry to save the outer level
3723 -- stack base value, and resets the stack base to point to this
3724 -- PC_Remove_Region node.
3730 procedure Pop_Region is
3732 -- If nothing was pushed in the inner region, we can just get
3733 -- rid of it entirely, leaving no traces that it was ever there
3735 if Stack_Ptr = Stack_Base then
3736 Stack_Ptr := Stack_Base - 2;
3737 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3739 -- If stuff was pushed in the inner region, then we have to
3740 -- push a PC_R_Restore node so that we properly handle possible
3741 -- rematches within the region.
3744 Stack_Ptr := Stack_Ptr + 1;
3745 Stack (Stack_Ptr).Cursor := Stack_Base;
3746 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3747 Stack_Base := Stack (Stack_Base).Cursor;
3755 procedure Push (Node : PE_Ptr) is
3757 Stack_Ptr := Stack_Ptr + 1;
3758 Stack (Stack_Ptr).Cursor := Cursor;
3759 Stack (Stack_Ptr).Node := Node;
3766 procedure Push_Region is
3768 Stack_Ptr := Stack_Ptr + 2;
3769 Stack (Stack_Ptr).Cursor := Stack_Base;
3770 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3771 Stack_Base := Stack_Ptr;
3774 -- Start of processing for XMatch
3777 if Pat_P = null then
3778 Uninitialized_Pattern;
3781 -- Check we have enough stack for this pattern. This check deals with
3782 -- every possibility except a match of a recursive pattern, where we
3783 -- make a check at each recursion level.
3785 if Pat_S >= Stack_Size - 1 then
3786 raise Pattern_Stack_Overflow;
3789 -- In anchored mode, the bottom entry on the stack is an abort entry
3791 if Anchored_Mode then
3792 Stack (Stack_Init).Node := CP_Cancel'Access;
3793 Stack (Stack_Init).Cursor := 0;
3795 -- In unanchored more, the bottom entry on the stack references
3796 -- the special pattern element PE_Unanchored, whose Pthen field
3797 -- points to the initial pattern element. The cursor value in this
3798 -- entry is the number of anchor moves so far.
3801 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3802 Stack (Stack_Init).Cursor := 0;
3805 Stack_Ptr := Stack_Init;
3806 Stack_Base := Stack_Ptr;
3811 -----------------------------------------
3812 -- Main Pattern Matching State Control --
3813 -----------------------------------------
3815 -- This is a state machine which uses gotos to change state. The
3816 -- initial state is Match, to initiate the matching of the first
3817 -- element, so the goto Match above starts the match. In the
3818 -- following descriptions, we indicate the global values that
3819 -- are relevant for the state transition.
3821 -- Come here if entire match fails
3828 -- Come here if entire match succeeds
3830 -- Cursor current position in subject string
3833 Start := Stack (Stack_Init).Cursor + 1;
3836 -- Scan history stack for deferred assignments or writes
3839 for S in Stack_Init .. Stack_Ptr loop
3840 if Stack (S).Node = CP_Assign'Access then
3842 Inner_Base : constant Stack_Range :=
3843 Stack (S + 1).Cursor;
3844 Special_Entry : constant Stack_Range :=
3846 Node_OnM : constant PE_Ptr :=
3847 Stack (Special_Entry).Node;
3848 Start : constant Natural :=
3849 Stack (Special_Entry).Cursor + 1;
3850 Stop : constant Natural := Stack (S).Cursor;
3853 if Node_OnM.Pcode = PC_Assign_OnM then
3854 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3856 elsif Node_OnM.Pcode = PC_Write_OnM then
3857 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3869 -- Come here if attempt to match current element fails
3871 -- Stack_Base current stack base
3872 -- Stack_Ptr current stack pointer
3875 Cursor := Stack (Stack_Ptr).Cursor;
3876 Node := Stack (Stack_Ptr).Node;
3877 Stack_Ptr := Stack_Ptr - 1;
3880 -- Come here if attempt to match current element succeeds
3882 -- Cursor current position in subject string
3883 -- Node pointer to node successfully matched
3884 -- Stack_Base current stack base
3885 -- Stack_Ptr current stack pointer
3890 -- Come here to match the next pattern element
3892 -- Cursor current position in subject string
3893 -- Node pointer to node to be matched
3894 -- Stack_Base current stack base
3895 -- Stack_Ptr current stack pointer
3899 --------------------------------------------------
3900 -- Main Pattern Match Element Matching Routines --
3901 --------------------------------------------------
3903 -- Here is the case statement that processes the current node. The
3904 -- processing for each element does one of five things:
3906 -- goto Succeed to move to the successor
3907 -- goto Match_Succeed if the entire match succeeds
3908 -- goto Match_Fail if the entire match fails
3909 -- goto Fail to signal failure of current match
3911 -- Processing is NOT allowed to fall through
3927 -- Any (one character case)
3931 and then Subject (Cursor + 1) = Node.Char
3933 Cursor := Cursor + 1;
3939 -- Any (character set case)
3943 and then Is_In (Subject (Cursor + 1), Node.CS)
3945 Cursor := Cursor + 1;
3951 -- Any (string function case)
3953 when PC_Any_VF => declare
3954 U : constant VString := Node.VF.all;
3955 S : Big_String_Access;
3959 Get_String (U, S, L);
3962 and then Is_In (Subject (Cursor + 1), S (1 .. L))
3964 Cursor := Cursor + 1;
3971 -- Any (string pointer case)
3973 when PC_Any_VP => declare
3974 U : constant VString := Node.VP.all;
3975 S : Big_String_Access;
3979 Get_String (U, S, L);
3982 and then Is_In (Subject (Cursor + 1), S (1 .. L))
3984 Cursor := Cursor + 1;
3991 -- Arb (initial match)
4001 if Cursor < Length then
4002 Cursor := Cursor + 1;
4009 -- Arbno_S (simple Arbno initialize). This is the node that
4010 -- initiates the match of a simple Arbno structure.
4017 -- Arbno_X (Arbno initialize). This is the node that initiates
4018 -- the match of a complex Arbno structure.
4025 -- Arbno_Y (Arbno rematch). This is the node that is executed
4026 -- following successful matching of one instance of a complex
4029 when PC_Arbno_Y => declare
4030 Null_Match : constant Boolean :=
4031 Cursor = Stack (Stack_Base - 1).Cursor;
4036 -- If arbno extension matched null, then immediately fail
4042 -- Here we must do a stack check to make sure enough stack
4043 -- is left. This check will happen once for each instance of
4044 -- the Arbno pattern that is matched. The Nat field of a
4045 -- PC_Arbno pattern contains the maximum stack entries needed
4046 -- for the Arbno with one instance and the successor pattern
4048 if Stack_Ptr + Node.Nat >= Stack'Last then
4049 raise Pattern_Stack_Overflow;
4055 -- Assign. If this node is executed, it means the assign-on-match
4056 -- or write-on-match operation will not happen after all, so we
4057 -- is propagate the failure, removing the PC_Assign node.
4062 -- Assign immediate. This node performs the actual assignment
4064 when PC_Assign_Imm =>
4067 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4071 -- Assign on match. This node sets up for the eventual assignment
4073 when PC_Assign_OnM =>
4074 Stack (Stack_Base - 1).Node := Node;
4075 Push (CP_Assign'Access);
4083 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4086 elsif Subject (Cursor + 1) = '(' then
4088 Paren_Count : Natural := 1;
4092 Cursor := Cursor + 1;
4094 if Cursor >= Length then
4097 elsif Subject (Cursor + 1) = '(' then
4098 Paren_Count := Paren_Count + 1;
4100 elsif Subject (Cursor + 1) = ')' then
4101 Paren_Count := Paren_Count - 1;
4102 exit when Paren_Count = 0;
4108 Cursor := Cursor + 1;
4112 -- Break (one character case)
4115 while Cursor < Length loop
4116 if Subject (Cursor + 1) = Node.Char then
4119 Cursor := Cursor + 1;
4125 -- Break (character set case)
4128 while Cursor < Length loop
4129 if Is_In (Subject (Cursor + 1), Node.CS) then
4132 Cursor := Cursor + 1;
4138 -- Break (string function case)
4140 when PC_Break_VF => declare
4141 U : constant VString := Node.VF.all;
4142 S : Big_String_Access;
4146 Get_String (U, S, L);
4148 while Cursor < Length loop
4149 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4152 Cursor := Cursor + 1;
4159 -- Break (string pointer case)
4161 when PC_Break_VP => declare
4162 U : constant VString := Node.VP.all;
4163 S : Big_String_Access;
4167 Get_String (U, S, L);
4169 while Cursor < Length loop
4170 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4173 Cursor := Cursor + 1;
4180 -- BreakX (one character case)
4182 when PC_BreakX_CH =>
4183 while Cursor < Length loop
4184 if Subject (Cursor + 1) = Node.Char then
4187 Cursor := Cursor + 1;
4193 -- BreakX (character set case)
4195 when PC_BreakX_CS =>
4196 while Cursor < Length loop
4197 if Is_In (Subject (Cursor + 1), Node.CS) then
4200 Cursor := Cursor + 1;
4206 -- BreakX (string function case)
4208 when PC_BreakX_VF => declare
4209 U : constant VString := Node.VF.all;
4210 S : Big_String_Access;
4214 Get_String (U, S, L);
4216 while Cursor < Length loop
4217 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4220 Cursor := Cursor + 1;
4227 -- BreakX (string pointer case)
4229 when PC_BreakX_VP => declare
4230 U : constant VString := Node.VP.all;
4231 S : Big_String_Access;
4235 Get_String (U, S, L);
4237 while Cursor < Length loop
4238 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4241 Cursor := Cursor + 1;
4248 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4249 -- Structures". This node is the alternative that is stacked to
4250 -- skip past the break character and extend the break.
4253 Cursor := Cursor + 1;
4256 -- Character (one character string)
4260 and then Subject (Cursor + 1) = Node.Char
4262 Cursor := Cursor + 1;
4271 if Stack_Base = Stack_Init then
4274 -- End of recursive inner match. See separate section on
4275 -- handing of recursive pattern matches for details.
4278 Node := Stack (Stack_Base - 1).Node;
4288 -- Fence (built in pattern)
4291 Push (CP_Cancel'Access);
4294 -- Fence function node X. This is the node that gets control
4295 -- after a successful match of the fenced pattern.
4298 Stack_Ptr := Stack_Ptr + 1;
4299 Stack (Stack_Ptr).Cursor := Stack_Base;
4300 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4301 Stack_Base := Stack (Stack_Base).Cursor;
4304 -- Fence function node Y. This is the node that gets control on
4305 -- a failure that occurs after the fenced pattern has matched.
4307 -- Note: the Cursor at this stage is actually the inner stack
4308 -- base value. We don't reset this, but we do use it to strip
4309 -- off all the entries made by the fenced pattern.
4312 Stack_Ptr := Cursor - 2;
4315 -- Len (integer case)
4318 if Cursor + Node.Nat > Length then
4321 Cursor := Cursor + Node.Nat;
4325 -- Len (Integer function case)
4327 when PC_Len_NF => declare
4328 N : constant Natural := Node.NF.all;
4330 if Cursor + N > Length then
4333 Cursor := Cursor + N;
4338 -- Len (integer pointer case)
4341 if Cursor + Node.NP.all > Length then
4344 Cursor := Cursor + Node.NP.all;
4348 -- NotAny (one character case)
4350 when PC_NotAny_CH =>
4352 and then Subject (Cursor + 1) /= Node.Char
4354 Cursor := Cursor + 1;
4360 -- NotAny (character set case)
4362 when PC_NotAny_CS =>
4364 and then not Is_In (Subject (Cursor + 1), Node.CS)
4366 Cursor := Cursor + 1;
4372 -- NotAny (string function case)
4374 when PC_NotAny_VF => declare
4375 U : constant VString := Node.VF.all;
4376 S : Big_String_Access;
4380 Get_String (U, S, L);
4384 not Is_In (Subject (Cursor + 1), S (1 .. L))
4386 Cursor := Cursor + 1;
4393 -- NotAny (string pointer case)
4395 when PC_NotAny_VP => declare
4396 U : constant VString := Node.VP.all;
4397 S : Big_String_Access;
4401 Get_String (U, S, L);
4405 not Is_In (Subject (Cursor + 1), S (1 .. L))
4407 Cursor := Cursor + 1;
4414 -- NSpan (one character case)
4417 while Cursor < Length
4418 and then Subject (Cursor + 1) = Node.Char
4420 Cursor := Cursor + 1;
4425 -- NSpan (character set case)
4428 while Cursor < Length
4429 and then Is_In (Subject (Cursor + 1), Node.CS)
4431 Cursor := Cursor + 1;
4436 -- NSpan (string function case)
4438 when PC_NSpan_VF => declare
4439 U : constant VString := Node.VF.all;
4440 S : Big_String_Access;
4444 Get_String (U, S, L);
4446 while Cursor < Length
4447 and then Is_In (Subject (Cursor + 1), S (1 .. L))
4449 Cursor := Cursor + 1;
4455 -- NSpan (string pointer case)
4457 when PC_NSpan_VP => declare
4458 U : constant VString := Node.VP.all;
4459 S : Big_String_Access;
4463 Get_String (U, S, L);
4465 while Cursor < Length
4466 and then Is_In (Subject (Cursor + 1), S (1 .. L))
4468 Cursor := Cursor + 1;
4479 -- Pos (integer case)
4482 if Cursor = Node.Nat then
4488 -- Pos (Integer function case)
4490 when PC_Pos_NF => declare
4491 N : constant Natural := Node.NF.all;
4500 -- Pos (integer pointer case)
4503 if Cursor = Node.NP.all then
4509 -- Predicate function
4511 when PC_Pred_Func =>
4518 -- Region Enter. Initiate new pattern history stack region
4521 Stack (Stack_Ptr + 1).Cursor := Cursor;
4525 -- Region Remove node. This is the node stacked by an R_Enter.
4526 -- It removes the special format stack entry right underneath, and
4527 -- then restores the outer level stack base and signals failure.
4529 -- Note: the cursor value at this stage is actually the (negative)
4530 -- stack base value for the outer level.
4533 Stack_Base := Cursor;
4534 Stack_Ptr := Stack_Ptr - 1;
4537 -- Region restore node. This is the node stacked at the end of an
4538 -- inner level match. Its function is to restore the inner level
4539 -- region, so that alternatives in this region can be sought.
4541 -- Note: the Cursor at this stage is actually the negative of the
4542 -- inner stack base value, which we use to restore the inner region.
4544 when PC_R_Restore =>
4545 Stack_Base := Cursor;
4554 -- Initiate recursive match (pattern pointer case)
4557 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4560 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4561 raise Pattern_Stack_Overflow;
4563 Node := Node.PP.all.P;
4567 -- RPos (integer case)
4570 if Cursor = (Length - Node.Nat) then
4576 -- RPos (integer function case)
4578 when PC_RPos_NF => declare
4579 N : constant Natural := Node.NF.all;
4581 if Length - Cursor = N then
4588 -- RPos (integer pointer case)
4591 if Cursor = (Length - Node.NP.all) then
4597 -- RTab (integer case)
4600 if Cursor <= (Length - Node.Nat) then
4601 Cursor := Length - Node.Nat;
4607 -- RTab (integer function case)
4609 when PC_RTab_NF => declare
4610 N : constant Natural := Node.NF.all;
4612 if Length - Cursor >= N then
4613 Cursor := Length - N;
4620 -- RTab (integer pointer case)
4623 if Cursor <= (Length - Node.NP.all) then
4624 Cursor := Length - Node.NP.all;
4630 -- Cursor assignment
4633 Node.Var.all := Cursor;
4636 -- Span (one character case)
4638 when PC_Span_CH => declare
4644 and then Subject (P + 1) = Node.Char
4657 -- Span (character set case)
4659 when PC_Span_CS => declare
4665 and then Is_In (Subject (P + 1), Node.CS)
4678 -- Span (string function case)
4680 when PC_Span_VF => declare
4681 U : constant VString := Node.VF.all;
4682 S : Big_String_Access;
4687 Get_String (U, S, L);
4691 and then Is_In (Subject (P + 1), S (1 .. L))
4704 -- Span (string pointer case)
4706 when PC_Span_VP => declare
4707 U : constant VString := Node.VP.all;
4708 S : Big_String_Access;
4713 Get_String (U, S, L);
4717 and then Is_In (Subject (P + 1), S (1 .. L))
4730 -- String (two character case)
4733 if (Length - Cursor) >= 2
4734 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4736 Cursor := Cursor + 2;
4742 -- String (three character case)
4745 if (Length - Cursor) >= 3
4746 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4748 Cursor := Cursor + 3;
4754 -- String (four character case)
4757 if (Length - Cursor) >= 4
4758 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4760 Cursor := Cursor + 4;
4766 -- String (five character case)
4769 if (Length - Cursor) >= 5
4770 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4772 Cursor := Cursor + 5;
4778 -- String (six character case)
4781 if (Length - Cursor) >= 6
4782 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4784 Cursor := Cursor + 6;
4790 -- String (case of more than six characters)
4792 when PC_String => declare
4793 Len : constant Natural := Node.Str'Length;
4795 if (Length - Cursor) >= Len
4796 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4798 Cursor := Cursor + Len;
4805 -- String (function case)
4807 when PC_String_VF => declare
4808 U : constant VString := Node.VF.all;
4809 S : Big_String_Access;
4813 Get_String (U, S, L);
4815 if (Length - Cursor) >= L
4816 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
4818 Cursor := Cursor + L;
4825 -- String (pointer case)
4827 when PC_String_VP => declare
4828 U : constant VString := Node.VP.all;
4829 S : Big_String_Access;
4833 Get_String (U, S, L);
4835 if (Length - Cursor) >= L
4836 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
4838 Cursor := Cursor + L;
4851 -- Tab (integer case)
4854 if Cursor <= Node.Nat then
4861 -- Tab (integer function case)
4863 when PC_Tab_NF => declare
4864 N : constant Natural := Node.NF.all;
4874 -- Tab (integer pointer case)
4877 if Cursor <= Node.NP.all then
4878 Cursor := Node.NP.all;
4884 -- Unanchored movement
4886 when PC_Unanchored =>
4888 -- All done if we tried every position
4890 if Cursor > Length then
4893 -- Otherwise extend the anchor point, and restack ourself
4896 Cursor := Cursor + 1;
4901 -- Write immediate. This node performs the actual write
4903 when PC_Write_Imm =>
4906 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4910 -- Write on match. This node sets up for the eventual write
4912 when PC_Write_OnM =>
4913 Stack (Stack_Base - 1).Node := Node;
4914 Push (CP_Assign'Access);
4921 -- We are NOT allowed to fall though this case statement, since every
4922 -- match routine must end by executing a goto to the appropriate point
4923 -- in the finite state machine model.
4925 pragma Warnings (Off);
4927 pragma Warnings (On);
4934 -- Maintenance note: There is a LOT of code duplication between XMatch
4935 -- and XMatchD. This is quite intentional, the point is to avoid any
4936 -- unnecessary debugging overhead in the XMatch case, but this does mean
4937 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4938 -- any major changes, the proper approach is to delete XMatch, make the
4939 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4940 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4947 Start : out Natural;
4951 -- Pointer to current pattern node. Initialized from Pat_P, and then
4952 -- updated as the match proceeds through its constituent elements.
4954 Length : constant Natural := Subject'Length;
4955 -- Length of string (= Subject'Last, since Subject'First is always 1)
4957 Cursor : Integer := 0;
4958 -- If the value is non-negative, then this value is the index showing
4959 -- the current position of the match in the subject string. The next
4960 -- character to be matched is at Subject (Cursor + 1). Note that since
4961 -- our view of the subject string in XMatch always has a lower bound
4962 -- of one, regardless of original bounds, that this definition exactly
4963 -- corresponds to the cursor value as referenced by functions like Pos.
4965 -- If the value is negative, then this is a saved stack pointer,
4966 -- typically a base pointer of an inner or outer region. Cursor
4967 -- temporarily holds such a value when it is popped from the stack
4968 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4969 -- cursor value before the match proceeds (e.g. by propagating the
4970 -- failure and popping a "real" cursor value from the stack.
4972 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4973 -- Dummy pattern element used in the unanchored case
4975 Region_Level : Natural := 0;
4976 -- Keeps track of recursive region level. This is used only for
4977 -- debugging, it is the number of saved history stack base values.
4980 -- The pattern matching failure stack for this call to Match
4982 Stack_Ptr : Stack_Range;
4983 -- Current stack pointer. This points to the top element of the stack
4984 -- that is currently in use. At the outer level this is the special
4985 -- entry placed on the stack according to the anchor mode.
4987 Stack_Init : constant Stack_Range := Stack'First + 1;
4988 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4989 -- initial (Stack'First) element of the stack is not used so that
4990 -- when we pop the last element off, Stack_Ptr is still in range.
4992 Stack_Base : Stack_Range;
4993 -- This value is the stack base value, i.e. the stack pointer for the
4994 -- first history stack entry in the current stack region. See separate
4995 -- section on handling of recursive pattern matches.
4997 Assign_OnM : Boolean := False;
4998 -- Set True if assign-on-match or write-on-match operations may be
4999 -- present in the history stack, which must then be scanned on a
5000 -- successful match.
5002 procedure Dout (Str : String);
5003 -- Output string to standard error with bars indicating region level
5005 procedure Dout (Str : String; A : Character);
5006 -- Calls Dout with the string S ('A')
5008 procedure Dout (Str : String; A : Character_Set);
5009 -- Calls Dout with the string S ("A")
5011 procedure Dout (Str : String; A : Natural);
5012 -- Calls Dout with the string S (A)
5014 procedure Dout (Str : String; A : String);
5015 -- Calls Dout with the string S ("A")
5017 function Img (P : PE_Ptr) return String;
5018 -- Returns a string of the form #nnn where nnn is P.Index
5020 procedure Pop_Region;
5021 pragma Inline (Pop_Region);
5022 -- Used at the end of processing of an inner region. If the inner
5023 -- region left no stack entries, then all trace of it is removed.
5024 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
5025 -- handling of alternatives in the inner region.
5027 procedure Push (Node : PE_Ptr);
5028 pragma Inline (Push);
5029 -- Make entry in pattern matching stack with current cursor value
5031 procedure Push_Region;
5032 pragma Inline (Push_Region);
5033 -- This procedure makes a new region on the history stack. The
5034 -- caller first establishes the special entry on the stack, but
5035 -- does not push the stack pointer. Then this call stacks a
5036 -- PC_Remove_Region node, on top of this entry, using the cursor
5037 -- field of the PC_Remove_Region entry to save the outer level
5038 -- stack base value, and resets the stack base to point to this
5039 -- PC_Remove_Region node.
5045 procedure Dout (Str : String) is
5047 for J in 1 .. Region_Level loop
5054 procedure Dout (Str : String; A : Character) is
5056 Dout (Str & " ('" & A & "')");
5059 procedure Dout (Str : String; A : Character_Set) is
5061 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
5064 procedure Dout (Str : String; A : Natural) is
5066 Dout (Str & " (" & A & ')');
5069 procedure Dout (Str : String; A : String) is
5071 Dout (Str & " (" & Image (A) & ')');
5078 function Img (P : PE_Ptr) return String is
5080 return "#" & Integer (P.Index) & " ";
5087 procedure Pop_Region is
5089 Region_Level := Region_Level - 1;
5091 -- If nothing was pushed in the inner region, we can just get
5092 -- rid of it entirely, leaving no traces that it was ever there
5094 if Stack_Ptr = Stack_Base then
5095 Stack_Ptr := Stack_Base - 2;
5096 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
5098 -- If stuff was pushed in the inner region, then we have to
5099 -- push a PC_R_Restore node so that we properly handle possible
5100 -- rematches within the region.
5103 Stack_Ptr := Stack_Ptr + 1;
5104 Stack (Stack_Ptr).Cursor := Stack_Base;
5105 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5106 Stack_Base := Stack (Stack_Base).Cursor;
5114 procedure Push (Node : PE_Ptr) is
5116 Stack_Ptr := Stack_Ptr + 1;
5117 Stack (Stack_Ptr).Cursor := Cursor;
5118 Stack (Stack_Ptr).Node := Node;
5125 procedure Push_Region is
5127 Region_Level := Region_Level + 1;
5128 Stack_Ptr := Stack_Ptr + 2;
5129 Stack (Stack_Ptr).Cursor := Stack_Base;
5130 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5131 Stack_Base := Stack_Ptr;
5134 -- Start of processing for XMatchD
5138 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5139 Put ("--------------------------------------");
5141 for J in 1 .. Length loop
5146 Put_Line ("subject length = " & Length);
5148 if Pat_P = null then
5149 Uninitialized_Pattern;
5152 -- Check we have enough stack for this pattern. This check deals with
5153 -- every possibility except a match of a recursive pattern, where we
5154 -- make a check at each recursion level.
5156 if Pat_S >= Stack_Size - 1 then
5157 raise Pattern_Stack_Overflow;
5160 -- In anchored mode, the bottom entry on the stack is an abort entry
5162 if Anchored_Mode then
5163 Stack (Stack_Init).Node := CP_Cancel'Access;
5164 Stack (Stack_Init).Cursor := 0;
5166 -- In unanchored more, the bottom entry on the stack references
5167 -- the special pattern element PE_Unanchored, whose Pthen field
5168 -- points to the initial pattern element. The cursor value in this
5169 -- entry is the number of anchor moves so far.
5172 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5173 Stack (Stack_Init).Cursor := 0;
5176 Stack_Ptr := Stack_Init;
5177 Stack_Base := Stack_Ptr;
5182 -----------------------------------------
5183 -- Main Pattern Matching State Control --
5184 -----------------------------------------
5186 -- This is a state machine which uses gotos to change state. The
5187 -- initial state is Match, to initiate the matching of the first
5188 -- element, so the goto Match above starts the match. In the
5189 -- following descriptions, we indicate the global values that
5190 -- are relevant for the state transition.
5192 -- Come here if entire match fails
5195 Dout ("match fails");
5201 -- Come here if entire match succeeds
5203 -- Cursor current position in subject string
5206 Dout ("match succeeds");
5207 Start := Stack (Stack_Init).Cursor + 1;
5209 Dout ("first matched character index = " & Start);
5210 Dout ("last matched character index = " & Stop);
5211 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5213 -- Scan history stack for deferred assignments or writes
5216 for S in Stack'First .. Stack_Ptr loop
5217 if Stack (S).Node = CP_Assign'Access then
5219 Inner_Base : constant Stack_Range :=
5220 Stack (S + 1).Cursor;
5221 Special_Entry : constant Stack_Range :=
5223 Node_OnM : constant PE_Ptr :=
5224 Stack (Special_Entry).Node;
5225 Start : constant Natural :=
5226 Stack (Special_Entry).Cursor + 1;
5227 Stop : constant Natural := Stack (S).Cursor;
5230 if Node_OnM.Pcode = PC_Assign_OnM then
5231 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5233 (Img (Stack (S).Node) &
5234 "deferred assignment of " &
5235 Image (Subject (Start .. Stop)));
5237 elsif Node_OnM.Pcode = PC_Write_OnM then
5238 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5240 (Img (Stack (S).Node) &
5241 "deferred write of " &
5242 Image (Subject (Start .. Stop)));
5255 -- Come here if attempt to match current element fails
5257 -- Stack_Base current stack base
5258 -- Stack_Ptr current stack pointer
5261 Cursor := Stack (Stack_Ptr).Cursor;
5262 Node := Stack (Stack_Ptr).Node;
5263 Stack_Ptr := Stack_Ptr - 1;
5266 Dout ("failure, cursor reset to " & Cursor);
5271 -- Come here if attempt to match current element succeeds
5273 -- Cursor current position in subject string
5274 -- Node pointer to node successfully matched
5275 -- Stack_Base current stack base
5276 -- Stack_Ptr current stack pointer
5279 Dout ("success, cursor = " & Cursor);
5282 -- Come here to match the next pattern element
5284 -- Cursor current position in subject string
5285 -- Node pointer to node to be matched
5286 -- Stack_Base current stack base
5287 -- Stack_Ptr current stack pointer
5291 --------------------------------------------------
5292 -- Main Pattern Match Element Matching Routines --
5293 --------------------------------------------------
5295 -- Here is the case statement that processes the current node. The
5296 -- processing for each element does one of five things:
5298 -- goto Succeed to move to the successor
5299 -- goto Match_Succeed if the entire match succeeds
5300 -- goto Match_Fail if the entire match fails
5301 -- goto Fail to signal failure of current match
5303 -- Processing is NOT allowed to fall through
5310 Dout (Img (Node) & "matching Cancel");
5317 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5322 -- Any (one character case)
5325 Dout (Img (Node) & "matching Any", Node.Char);
5328 and then Subject (Cursor + 1) = Node.Char
5330 Cursor := Cursor + 1;
5336 -- Any (character set case)
5339 Dout (Img (Node) & "matching Any", Node.CS);
5342 and then Is_In (Subject (Cursor + 1), Node.CS)
5344 Cursor := Cursor + 1;
5350 -- Any (string function case)
5352 when PC_Any_VF => declare
5353 U : constant VString := Node.VF.all;
5354 S : Big_String_Access;
5358 Get_String (U, S, L);
5360 Dout (Img (Node) & "matching Any", S (1 .. L));
5363 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5365 Cursor := Cursor + 1;
5372 -- Any (string pointer case)
5374 when PC_Any_VP => declare
5375 U : constant VString := Node.VP.all;
5376 S : Big_String_Access;
5380 Get_String (U, S, L);
5381 Dout (Img (Node) & "matching Any", S (1 .. L));
5384 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5386 Cursor := Cursor + 1;
5393 -- Arb (initial match)
5396 Dout (Img (Node) & "matching Arb");
5404 Dout (Img (Node) & "extending Arb");
5406 if Cursor < Length then
5407 Cursor := Cursor + 1;
5414 -- Arbno_S (simple Arbno initialize). This is the node that
5415 -- initiates the match of a simple Arbno structure.
5419 "setting up Arbno alternative " & Img (Node.Alt));
5424 -- Arbno_X (Arbno initialize). This is the node that initiates
5425 -- the match of a complex Arbno structure.
5429 "setting up Arbno alternative " & Img (Node.Alt));
5434 -- Arbno_Y (Arbno rematch). This is the node that is executed
5435 -- following successful matching of one instance of a complex
5438 when PC_Arbno_Y => declare
5439 Null_Match : constant Boolean :=
5440 Cursor = Stack (Stack_Base - 1).Cursor;
5443 Dout (Img (Node) & "extending Arbno");
5446 -- If arbno extension matched null, then immediately fail
5449 Dout ("Arbno extension matched null, so fails");
5453 -- Here we must do a stack check to make sure enough stack
5454 -- is left. This check will happen once for each instance of
5455 -- the Arbno pattern that is matched. The Nat field of a
5456 -- PC_Arbno pattern contains the maximum stack entries needed
5457 -- for the Arbno with one instance and the successor pattern
5459 if Stack_Ptr + Node.Nat >= Stack'Last then
5460 raise Pattern_Stack_Overflow;
5466 -- Assign. If this node is executed, it means the assign-on-match
5467 -- or write-on-match operation will not happen after all, so we
5468 -- is propagate the failure, removing the PC_Assign node.
5471 Dout (Img (Node) & "deferred assign/write cancelled");
5474 -- Assign immediate. This node performs the actual assignment
5476 when PC_Assign_Imm =>
5478 (Img (Node) & "executing immediate assignment of " &
5479 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5482 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5486 -- Assign on match. This node sets up for the eventual assignment
5488 when PC_Assign_OnM =>
5489 Dout (Img (Node) & "registering deferred assignment");
5490 Stack (Stack_Base - 1).Node := Node;
5491 Push (CP_Assign'Access);
5499 Dout (Img (Node) & "matching or extending Bal");
5500 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5503 elsif Subject (Cursor + 1) = '(' then
5505 Paren_Count : Natural := 1;
5509 Cursor := Cursor + 1;
5511 if Cursor >= Length then
5514 elsif Subject (Cursor + 1) = '(' then
5515 Paren_Count := Paren_Count + 1;
5517 elsif Subject (Cursor + 1) = ')' then
5518 Paren_Count := Paren_Count - 1;
5519 exit when Paren_Count = 0;
5525 Cursor := Cursor + 1;
5529 -- Break (one character case)
5532 Dout (Img (Node) & "matching Break", Node.Char);
5534 while Cursor < Length loop
5535 if Subject (Cursor + 1) = Node.Char then
5538 Cursor := Cursor + 1;
5544 -- Break (character set case)
5547 Dout (Img (Node) & "matching Break", Node.CS);
5549 while Cursor < Length loop
5550 if Is_In (Subject (Cursor + 1), Node.CS) then
5553 Cursor := Cursor + 1;
5559 -- Break (string function case)
5561 when PC_Break_VF => declare
5562 U : constant VString := Node.VF.all;
5563 S : Big_String_Access;
5567 Get_String (U, S, L);
5568 Dout (Img (Node) & "matching Break", S (1 .. L));
5570 while Cursor < Length loop
5571 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5574 Cursor := Cursor + 1;
5581 -- Break (string pointer case)
5583 when PC_Break_VP => declare
5584 U : constant VString := Node.VP.all;
5585 S : Big_String_Access;
5589 Get_String (U, S, L);
5590 Dout (Img (Node) & "matching Break", S (1 .. L));
5592 while Cursor < Length loop
5593 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5596 Cursor := Cursor + 1;
5603 -- BreakX (one character case)
5605 when PC_BreakX_CH =>
5606 Dout (Img (Node) & "matching BreakX", Node.Char);
5608 while Cursor < Length loop
5609 if Subject (Cursor + 1) = Node.Char then
5612 Cursor := Cursor + 1;
5618 -- BreakX (character set case)
5620 when PC_BreakX_CS =>
5621 Dout (Img (Node) & "matching BreakX", Node.CS);
5623 while Cursor < Length loop
5624 if Is_In (Subject (Cursor + 1), Node.CS) then
5627 Cursor := Cursor + 1;
5633 -- BreakX (string function case)
5635 when PC_BreakX_VF => declare
5636 U : constant VString := Node.VF.all;
5637 S : Big_String_Access;
5641 Get_String (U, S, L);
5642 Dout (Img (Node) & "matching BreakX", S (1 .. L));
5644 while Cursor < Length loop
5645 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5648 Cursor := Cursor + 1;
5655 -- BreakX (string pointer case)
5657 when PC_BreakX_VP => declare
5658 U : constant VString := Node.VP.all;
5659 S : Big_String_Access;
5663 Get_String (U, S, L);
5664 Dout (Img (Node) & "matching BreakX", S (1 .. L));
5666 while Cursor < Length loop
5667 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5670 Cursor := Cursor + 1;
5677 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5678 -- Structures". This node is the alternative that is stacked
5679 -- to skip past the break character and extend the break.
5682 Dout (Img (Node) & "extending BreakX");
5683 Cursor := Cursor + 1;
5686 -- Character (one character string)
5689 Dout (Img (Node) & "matching '" & Node.Char & ''');
5692 and then Subject (Cursor + 1) = Node.Char
5694 Cursor := Cursor + 1;
5703 if Stack_Base = Stack_Init then
5704 Dout ("end of pattern");
5707 -- End of recursive inner match. See separate section on
5708 -- handing of recursive pattern matches for details.
5711 Dout ("terminating recursive match");
5712 Node := Stack (Stack_Base - 1).Node;
5720 Dout (Img (Node) & "matching Fail");
5723 -- Fence (built in pattern)
5726 Dout (Img (Node) & "matching Fence");
5727 Push (CP_Cancel'Access);
5730 -- Fence function node X. This is the node that gets control
5731 -- after a successful match of the fenced pattern.
5734 Dout (Img (Node) & "matching Fence function");
5735 Stack_Ptr := Stack_Ptr + 1;
5736 Stack (Stack_Ptr).Cursor := Stack_Base;
5737 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5738 Stack_Base := Stack (Stack_Base).Cursor;
5739 Region_Level := Region_Level - 1;
5742 -- Fence function node Y. This is the node that gets control on
5743 -- a failure that occurs after the fenced pattern has matched.
5745 -- Note: the Cursor at this stage is actually the inner stack
5746 -- base value. We don't reset this, but we do use it to strip
5747 -- off all the entries made by the fenced pattern.
5750 Dout (Img (Node) & "pattern matched by Fence caused failure");
5751 Stack_Ptr := Cursor - 2;
5754 -- Len (integer case)
5757 Dout (Img (Node) & "matching Len", Node.Nat);
5759 if Cursor + Node.Nat > Length then
5762 Cursor := Cursor + Node.Nat;
5766 -- Len (Integer function case)
5768 when PC_Len_NF => declare
5769 N : constant Natural := Node.NF.all;
5772 Dout (Img (Node) & "matching Len", N);
5774 if Cursor + N > Length then
5777 Cursor := Cursor + N;
5782 -- Len (integer pointer case)
5785 Dout (Img (Node) & "matching Len", Node.NP.all);
5787 if Cursor + Node.NP.all > Length then
5790 Cursor := Cursor + Node.NP.all;
5794 -- NotAny (one character case)
5796 when PC_NotAny_CH =>
5797 Dout (Img (Node) & "matching NotAny", Node.Char);
5800 and then Subject (Cursor + 1) /= Node.Char
5802 Cursor := Cursor + 1;
5808 -- NotAny (character set case)
5810 when PC_NotAny_CS =>
5811 Dout (Img (Node) & "matching NotAny", Node.CS);
5814 and then not Is_In (Subject (Cursor + 1), Node.CS)
5816 Cursor := Cursor + 1;
5822 -- NotAny (string function case)
5824 when PC_NotAny_VF => declare
5825 U : constant VString := Node.VF.all;
5826 S : Big_String_Access;
5830 Get_String (U, S, L);
5831 Dout (Img (Node) & "matching NotAny", S (1 .. L));
5835 not Is_In (Subject (Cursor + 1), S (1 .. L))
5837 Cursor := Cursor + 1;
5844 -- NotAny (string pointer case)
5846 when PC_NotAny_VP => declare
5847 U : constant VString := Node.VP.all;
5848 S : Big_String_Access;
5852 Get_String (U, S, L);
5853 Dout (Img (Node) & "matching NotAny", S (1 .. L));
5857 not Is_In (Subject (Cursor + 1), S (1 .. L))
5859 Cursor := Cursor + 1;
5866 -- NSpan (one character case)
5869 Dout (Img (Node) & "matching NSpan", Node.Char);
5871 while Cursor < Length
5872 and then Subject (Cursor + 1) = Node.Char
5874 Cursor := Cursor + 1;
5879 -- NSpan (character set case)
5882 Dout (Img (Node) & "matching NSpan", Node.CS);
5884 while Cursor < Length
5885 and then Is_In (Subject (Cursor + 1), Node.CS)
5887 Cursor := Cursor + 1;
5892 -- NSpan (string function case)
5894 when PC_NSpan_VF => declare
5895 U : constant VString := Node.VF.all;
5896 S : Big_String_Access;
5900 Get_String (U, S, L);
5901 Dout (Img (Node) & "matching NSpan", S (1 .. L));
5903 while Cursor < Length
5904 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5906 Cursor := Cursor + 1;
5912 -- NSpan (string pointer case)
5914 when PC_NSpan_VP => declare
5915 U : constant VString := Node.VP.all;
5916 S : Big_String_Access;
5920 Get_String (U, S, L);
5921 Dout (Img (Node) & "matching NSpan", S (1 .. L));
5923 while Cursor < Length
5924 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5926 Cursor := Cursor + 1;
5933 Dout (Img (Node) & "matching null");
5936 -- Pos (integer case)
5939 Dout (Img (Node) & "matching Pos", Node.Nat);
5941 if Cursor = Node.Nat then
5947 -- Pos (Integer function case)
5949 when PC_Pos_NF => declare
5950 N : constant Natural := Node.NF.all;
5953 Dout (Img (Node) & "matching Pos", N);
5962 -- Pos (integer pointer case)
5965 Dout (Img (Node) & "matching Pos", Node.NP.all);
5967 if Cursor = Node.NP.all then
5973 -- Predicate function
5975 when PC_Pred_Func =>
5976 Dout (Img (Node) & "matching predicate function");
5984 -- Region Enter. Initiate new pattern history stack region
5987 Dout (Img (Node) & "starting match of nested pattern");
5988 Stack (Stack_Ptr + 1).Cursor := Cursor;
5992 -- Region Remove node. This is the node stacked by an R_Enter.
5993 -- It removes the special format stack entry right underneath, and
5994 -- then restores the outer level stack base and signals failure.
5996 -- Note: the cursor value at this stage is actually the (negative)
5997 -- stack base value for the outer level.
6000 Dout ("failure, match of nested pattern terminated");
6001 Stack_Base := Cursor;
6002 Region_Level := Region_Level - 1;
6003 Stack_Ptr := Stack_Ptr - 1;
6006 -- Region restore node. This is the node stacked at the end of an
6007 -- inner level match. Its function is to restore the inner level
6008 -- region, so that alternatives in this region can be sought.
6010 -- Note: the Cursor at this stage is actually the negative of the
6011 -- inner stack base value, which we use to restore the inner region.
6013 when PC_R_Restore =>
6014 Dout ("failure, search for alternatives in nested pattern");
6015 Region_Level := Region_Level + 1;
6016 Stack_Base := Cursor;
6022 Dout (Img (Node) & "matching Rest");
6026 -- Initiate recursive match (pattern pointer case)
6029 Stack (Stack_Ptr + 1).Node := Node.Pthen;
6031 Dout (Img (Node) & "initiating recursive match");
6033 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
6034 raise Pattern_Stack_Overflow;
6036 Node := Node.PP.all.P;
6040 -- RPos (integer case)
6043 Dout (Img (Node) & "matching RPos", Node.Nat);
6045 if Cursor = (Length - Node.Nat) then
6051 -- RPos (integer function case)
6053 when PC_RPos_NF => declare
6054 N : constant Natural := Node.NF.all;
6057 Dout (Img (Node) & "matching RPos", N);
6059 if Length - Cursor = N then
6066 -- RPos (integer pointer case)
6069 Dout (Img (Node) & "matching RPos", Node.NP.all);
6071 if Cursor = (Length - Node.NP.all) then
6077 -- RTab (integer case)
6080 Dout (Img (Node) & "matching RTab", Node.Nat);
6082 if Cursor <= (Length - Node.Nat) then
6083 Cursor := Length - Node.Nat;
6089 -- RTab (integer function case)
6091 when PC_RTab_NF => declare
6092 N : constant Natural := Node.NF.all;
6095 Dout (Img (Node) & "matching RPos", N);
6097 if Length - Cursor >= N then
6098 Cursor := Length - N;
6105 -- RTab (integer pointer case)
6108 Dout (Img (Node) & "matching RPos", Node.NP.all);
6110 if Cursor <= (Length - Node.NP.all) then
6111 Cursor := Length - Node.NP.all;
6117 -- Cursor assignment
6120 Dout (Img (Node) & "matching Setcur");
6121 Node.Var.all := Cursor;
6124 -- Span (one character case)
6126 when PC_Span_CH => declare
6127 P : Natural := Cursor;
6130 Dout (Img (Node) & "matching Span", Node.Char);
6133 and then Subject (P + 1) = Node.Char
6146 -- Span (character set case)
6148 when PC_Span_CS => declare
6149 P : Natural := Cursor;
6152 Dout (Img (Node) & "matching Span", Node.CS);
6155 and then Is_In (Subject (P + 1), Node.CS)
6168 -- Span (string function case)
6170 when PC_Span_VF => declare
6171 U : constant VString := Node.VF.all;
6172 S : Big_String_Access;
6177 Get_String (U, S, L);
6178 Dout (Img (Node) & "matching Span", S (1 .. L));
6182 and then Is_In (Subject (P + 1), S (1 .. L))
6195 -- Span (string pointer case)
6197 when PC_Span_VP => declare
6198 U : constant VString := Node.VP.all;
6199 S : Big_String_Access;
6204 Get_String (U, S, L);
6205 Dout (Img (Node) & "matching Span", S (1 .. L));
6209 and then Is_In (Subject (P + 1), S (1 .. L))
6222 -- String (two character case)
6225 Dout (Img (Node) & "matching " & Image (Node.Str2));
6227 if (Length - Cursor) >= 2
6228 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6230 Cursor := Cursor + 2;
6236 -- String (three character case)
6239 Dout (Img (Node) & "matching " & Image (Node.Str3));
6241 if (Length - Cursor) >= 3
6242 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6244 Cursor := Cursor + 3;
6250 -- String (four character case)
6253 Dout (Img (Node) & "matching " & Image (Node.Str4));
6255 if (Length - Cursor) >= 4
6256 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6258 Cursor := Cursor + 4;
6264 -- String (five character case)
6267 Dout (Img (Node) & "matching " & Image (Node.Str5));
6269 if (Length - Cursor) >= 5
6270 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6272 Cursor := Cursor + 5;
6278 -- String (six character case)
6281 Dout (Img (Node) & "matching " & Image (Node.Str6));
6283 if (Length - Cursor) >= 6
6284 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6286 Cursor := Cursor + 6;
6292 -- String (case of more than six characters)
6294 when PC_String => declare
6295 Len : constant Natural := Node.Str'Length;
6298 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6300 if (Length - Cursor) >= Len
6301 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6303 Cursor := Cursor + Len;
6310 -- String (function case)
6312 when PC_String_VF => declare
6313 U : constant VString := Node.VF.all;
6314 S : Big_String_Access;
6318 Get_String (U, S, L);
6319 Dout (Img (Node) & "matching " & Image (S (1 .. L)));
6321 if (Length - Cursor) >= L
6322 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
6324 Cursor := Cursor + L;
6331 -- String (vstring pointer case)
6333 when PC_String_VP => declare
6334 U : constant VString := Node.VP.all;
6335 S : Big_String_Access;
6339 Get_String (U, S, L);
6340 Dout (Img (Node) & "matching " & Image (S (1 .. L)));
6342 if (Length - Cursor) >= L
6343 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
6345 Cursor := Cursor + L;
6355 Dout (Img (Node) & "matching Succeed");
6359 -- Tab (integer case)
6362 Dout (Img (Node) & "matching Tab", Node.Nat);
6364 if Cursor <= Node.Nat then
6371 -- Tab (integer function case)
6373 when PC_Tab_NF => declare
6374 N : constant Natural := Node.NF.all;
6377 Dout (Img (Node) & "matching Tab ", N);
6387 -- Tab (integer pointer case)
6390 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6392 if Cursor <= Node.NP.all then
6393 Cursor := Node.NP.all;
6399 -- Unanchored movement
6401 when PC_Unanchored =>
6402 Dout ("attempting to move anchor point");
6404 -- All done if we tried every position
6406 if Cursor > Length then
6409 -- Otherwise extend the anchor point, and restack ourself
6412 Cursor := Cursor + 1;
6417 -- Write immediate. This node performs the actual write
6419 when PC_Write_Imm =>
6420 Dout (Img (Node) & "executing immediate write of " &
6421 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6425 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6429 -- Write on match. This node sets up for the eventual write
6431 when PC_Write_OnM =>
6432 Dout (Img (Node) & "registering deferred write");
6433 Stack (Stack_Base - 1).Node := Node;
6434 Push (CP_Assign'Access);
6441 -- We are NOT allowed to fall though this case statement, since every
6442 -- match routine must end by executing a goto to the appropriate point
6443 -- in the finite state machine model.
6445 pragma Warnings (Off);
6447 pragma Warnings (On);
6450 end GNAT.Spitbol.Patterns;