1 ------------------------------------------------------------------------------
3 -- GNAT LIBRARY COMPONENTS --
5 -- G N A T . S P I T B O L . P A T T E R N S --
9 -- Copyright (C) 1998-2007, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 -- Note: the data structures and general approach used in this implementation
35 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
36 -- a direct translation, but the approach is followed closely. In particular,
37 -- we use the one stack approach developed in the SPITBOL implementation.
39 with Ada.Exceptions; use Ada.Exceptions;
40 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
42 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
44 with System; use System;
46 with Ada.Unchecked_Conversion;
47 with Ada.Unchecked_Deallocation;
49 package body GNAT.Spitbol.Patterns is
51 ------------------------
52 -- Internal Debugging --
53 ------------------------
55 Internal_Debug : constant Boolean := False;
56 -- Set this flag to True to activate some built-in debugging traceback
57 -- These are all lines output with PutD and Put_LineD.
60 pragma Inline (New_LineD);
61 -- Output new blank line with New_Line if Internal_Debug is True
63 procedure PutD (Str : String);
65 -- Output string with Put if Internal_Debug is True
67 procedure Put_LineD (Str : String);
68 pragma Inline (Put_LineD);
69 -- Output string with Put_Line if Internal_Debug is True
71 -----------------------------
72 -- Local Type Declarations --
73 -----------------------------
75 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
76 subtype File_Ptr is Ada.Text_IO.File_Access;
78 function To_Address is new Ada.Unchecked_Conversion (PE_Ptr, Address);
79 -- Used only for debugging output purposes
81 subtype AFC is Ada.Finalization.Controlled;
83 N : constant PE_Ptr := null;
84 -- Shorthand used to initialize Copy fields to null
86 type Natural_Ptr is access all Natural;
87 type Pattern_Ptr is access all Pattern;
89 --------------------------------------------------
90 -- Description of Algorithm and Data Structures --
91 --------------------------------------------------
93 -- A pattern structure is represented as a linked graph of nodes
94 -- with the following structure:
96 -- +------------------------------------+
98 -- +------------------------------------+
100 -- +------------------------------------+
102 -- +------------------------------------+
104 -- +------------------------------------+
106 -- Pcode is a code value indicating the type of the patterm node. This
107 -- code is used both as the discriminant value for the record, and as
108 -- the case index in the main match routine that branches to the proper
109 -- match code for the given element.
111 -- Index is a serial index number. The use of these serial index
112 -- numbers is described in a separate section.
114 -- Pthen is a pointer to the successor node, i.e the node to be matched
115 -- if the attempt to match the node succeeds. If this is the last node
116 -- of the pattern to be matched, then Pthen points to a dummy node
117 -- of kind PC_EOP (end of pattern), which initiales pattern exit.
119 -- The parameter or parameters are present for certain node types,
120 -- and the type varies with the pattern code.
122 type Pattern_Code is (
215 type IndexT is range 0 .. +(2 **15 - 1);
217 type PE (Pcode : Pattern_Code) is record
220 -- Serial index number of pattern element within pattern
223 -- Successor element, to be matched after this one
243 PC_Unanchored => null;
248 PC_Arbno_X => Alt : PE_Ptr;
250 when PC_Rpat => PP : Pattern_Ptr;
252 when PC_Pred_Func => BF : Boolean_Func;
262 PC_String_VP => VP : VString_Ptr;
265 PC_Write_OnM => FP : File_Ptr;
267 when PC_String => Str : String_Ptr;
269 when PC_String_2 => Str2 : String (1 .. 2);
271 when PC_String_3 => Str3 : String (1 .. 3);
273 when PC_String_4 => Str4 : String (1 .. 4);
275 when PC_String_5 => Str5 : String (1 .. 5);
277 when PC_String_6 => Str6 : String (1 .. 6);
279 when PC_Setcur => Var : Natural_Ptr;
287 PC_Span_CH => Char : Character;
294 PC_Span_CS => CS : Character_Set;
301 PC_Tab_Nat => Nat : Natural;
307 PC_Tab_NF => NF : Natural_Func;
313 PC_Tab_NP => NP : Natural_Ptr;
321 PC_String_VF => VF : VString_Func;
326 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
327 -- Range of pattern codes that has an Alt field. This is used in the
328 -- recursive traversals, since these links must be followed.
330 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
331 -- This is the end of pattern element, and is thus the representation of
332 -- a null pattern. It has a zero index element since it is never placed
333 -- inside a pattern. Furthermore it does not need a successor, since it
334 -- marks the end of the pattern, so that no more successors are needed.
336 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
337 -- This is the end of pattern pointer, that is used in the Pthen pointer
338 -- of other nodes to signal end of pattern.
340 -- The following array is used to determine if a pattern used as an
341 -- argument for Arbno is eligible for treatment using the simple Arbno
342 -- structure (i.e. it is a pattern that is guaranteed to match at least
343 -- one character on success, and not to make any entries on the stack.
345 OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean :=
368 -------------------------------
369 -- The Pattern History Stack --
370 -------------------------------
372 -- The pattern history stack is used for controlling backtracking when
373 -- a match fails. The idea is to stack entries that give a cursor value
374 -- to be restored, and a node to be reestablished as the current node to
375 -- attempt an appropriate rematch operation. The processing for a pattern
376 -- element that has rematch alternatives pushes an appropriate entry or
377 -- entry on to the stack, and the proceeds. If a match fails at any point,
378 -- the top element of the stack is popped off, resetting the cursor and
379 -- the match continues by accessing the node stored with this entry.
381 type Stack_Entry is record
384 -- Saved cursor value that is restored when this entry is popped
385 -- from the stack if a match attempt fails. Occasionally, this
386 -- field is used to store a history stack pointer instead of a
387 -- cursor. Such cases are noted in the documentation and the value
388 -- stored is negative since stack pointer values are always negative.
391 -- This pattern element reference is reestablished as the current
392 -- Node to be matched (which will attempt an appropriate rematch).
396 subtype Stack_Range is Integer range -Stack_Size .. -1;
398 type Stack_Type is array (Stack_Range) of Stack_Entry;
399 -- The type used for a history stack. The actual instance of the stack
400 -- is declared as a local variable in the Match routine, to properly
401 -- handle recursive calls to Match. All stack pointer values are negative
402 -- to distinguish them from normal cursor values.
404 -- Note: the pattern matching stack is used only to handle backtracking.
405 -- If no backtracking occurs, its entries are never accessed, and never
406 -- popped off, and in particular it is normal for a successful match
407 -- to terminate with entries on the stack that are simply discarded.
409 -- Note: in subsequent diagrams of the stack, we always place element
410 -- zero (the deepest element) at the top of the page, then build the
411 -- stack down on the page with the most recent (top of stack) element
412 -- being the bottom-most entry on the page.
414 -- Stack checking is handled by labeling every pattern with the maximum
415 -- number of stack entries that are required, so a single check at the
416 -- start of matching the pattern suffices. There are two exceptions.
418 -- First, the count does not include entries for recursive pattern
419 -- references. Such recursions must therefore perform a specific
420 -- stack check with respect to the number of stack entries required
421 -- by the recursive pattern that is accessed and the amount of stack
422 -- that remains unused.
424 -- Second, the count includes only one iteration of an Arbno pattern,
425 -- so a specific check must be made on subsequent iterations that there
426 -- is still enough stack space left. The Arbno node has a field that
427 -- records the number of stack entries required by its argument for
430 ---------------------------------------------------
431 -- Use of Serial Index Field in Pattern Elements --
432 ---------------------------------------------------
434 -- The serial index numbers for the pattern elements are assigned as
435 -- a pattern is consructed from its constituent elements. Note that there
436 -- is never any sharing of pattern elements between patterns (copies are
437 -- always made), so the serial index numbers are unique to a particular
438 -- pattern as referenced from the P field of a value of type Pattern.
440 -- The index numbers meet three separate invariants, which are used for
441 -- various purposes as described in this section.
443 -- First, the numbers uniquely identify the pattern elements within a
444 -- pattern. If Num is the number of elements in a given pattern, then
445 -- the serial index numbers for the elements of this pattern will range
446 -- from 1 .. Num, so that each element has a separate value.
448 -- The purpose of this assignment is to provide a convenient auxiliary
449 -- data structure mechanism during operations which must traverse a
450 -- pattern (e.g. copy and finalization processing). Once constructed
451 -- patterns are strictly read only. This is necessary to allow sharing
452 -- of patterns between tasks. This means that we cannot go marking the
453 -- pattern (e.g. with a visited bit). Instead we cosntuct a separate
454 -- vector that contains the necessary information indexed by the Index
455 -- values in the pattern elements. For this purpose the only requirement
456 -- is that they be uniquely assigned.
458 -- Second, the pattern element referenced directly, i.e. the leading
459 -- pattern element, is always the maximum numbered element and therefore
460 -- indicates the total number of elements in the pattern. More precisely,
461 -- the element referenced by the P field of a pattern value, or the
462 -- element returned by any of the internal pattern construction routines
463 -- in the body (that return a value of type PE_Ptr) always is this
466 -- The purpose of this requirement is to allow an immediate determination
467 -- of the number of pattern elements within a pattern. This is used to
468 -- properly size the vectors used to contain auxiliary information for
469 -- traversal as described above.
471 -- Third, as compound pattern structures are constructed, the way in which
472 -- constituent parts of the pattern are constructed is stylized. This is
473 -- an automatic consequence of the way that these compounjd structures
474 -- are constructed, and basically what we are doing is simply documenting
475 -- and specifying the natural result of the pattern construction. The
476 -- section describing compound pattern structures gives details of the
477 -- numbering of each compound pattern structure.
479 -- The purpose of specifying the stylized numbering structures for the
480 -- compound patterns is to help simplify the processing in the Image
481 -- function, since it eases the task of retrieving the original recursive
482 -- structure of the pattern from the flat graph structure of elements.
483 -- This use in the Image function is the only point at which the code
484 -- makes use of the stylized structures.
486 type Ref_Array is array (IndexT range <>) of PE_Ptr;
487 -- This type is used to build an array whose N'th entry references the
488 -- element in a pattern whose Index value is N. See Build_Ref_Array.
490 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
491 -- Given a pattern element which is the leading element of a pattern
492 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
493 -- Ref_Array so that its N'th entry references the element of the
494 -- referenced pattern whose Index value is N.
496 -------------------------------
497 -- Recursive Pattern Matches --
498 -------------------------------
500 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
501 -- causes a recursive pattern match. This cannot be handled by an actual
502 -- recursive call to the outer level Match routine, since this would not
503 -- allow for possible backtracking into the region matched by the inner
504 -- pattern. Indeed this is the classical clash between recursion and
505 -- backtracking, and a simple recursive stack structure does not suffice.
507 -- This section describes how this recursion and the possible associated
508 -- backtracking is handled. We still use a single stack, but we establish
509 -- the concept of nested regions on this stack, each of which has a stack
510 -- base value pointing to the deepest stack entry of the region. The base
511 -- value for the outer level is zero.
513 -- When a recursive match is established, two special stack entries are
514 -- made. The first entry is used to save the original node that starts
515 -- the recursive match. This is saved so that the successor field of
516 -- this node is accessible at the end of the match, but it is never
517 -- popped and executed.
519 -- The second entry corresponds to a standard new region action. A
520 -- PC_R_Remove node is stacked, whose cursor field is used to store
521 -- the outer stack base, and the stack base is reset to point to
522 -- this PC_R_Remove node. Then the recursive pattern is matched and
523 -- it can make history stack entries in the normal matter, so now
524 -- the stack looks like:
526 -- (stack entries made by outer level)
528 -- (Special entry, node is (+P) successor
529 -- cursor entry is not used)
531 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
532 -- saved base value for the enclosing region)
534 -- (stack entries made by inner level)
536 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
537 -- removes itself and the special entry immediately underneath it,
538 -- restores the stack base value for the enclosing region, and then
539 -- again signals failure to look for alternatives that were stacked
540 -- before the recursion was initiated.
542 -- Now we need to consider what happens if the inner pattern succeeds, as
543 -- signalled by accessing the special PC_EOP pattern primitive. First we
544 -- recognize the nested case by looking at the Base value. If this Base
545 -- value is Stack'First, then the entire match has succeeded, but if the
546 -- base value is greater than Stack'First, then we have successfully
547 -- matched an inner pattern, and processing continues at the outer level.
549 -- There are two cases. The simple case is when the inner pattern has made
550 -- no stack entries, as recognized by the fact that the current stack
551 -- pointer is equal to the current base value. In this case it is fine to
552 -- remove all trace of the recursion by restoring the outer base value and
553 -- using the special entry to find the appropriate successor node.
555 -- The more complex case arises when the inner match does make stack
556 -- entries. In this case, the PC_EOP processing stacks a special entry
557 -- whose cursor value saves the saved inner base value (the one that
558 -- references the corresponding PC_R_Remove value), and whose node
559 -- pointer references a PC_R_Restore node, so the stack looks like:
561 -- (stack entries made by outer level)
563 -- (Special entry, node is (+P) successor,
564 -- cursor entry is not used)
566 -- (PC_R_Remove entry, "cursor" value is (negative)
567 -- saved base value for the enclosing region)
569 -- (stack entries made by inner level)
571 -- (PC_Region_Replace entry, "cursor" value is (negative)
572 -- stack pointer value referencing the PC_R_Remove entry).
574 -- If the entire match succeeds, then these stack entries are, as usual,
575 -- ignored and abandoned. If on the other hand a subsequent failure
576 -- causes the PC_Region_Replace entry to be popped, it restores the
577 -- inner base value from its saved "cursor" value and then fails again.
578 -- Note that it is OK that the cursor is temporarily clobbered by this
579 -- pop, since the second failure will reestablish a proper cursor value.
581 ---------------------------------
582 -- Compound Pattern Structures --
583 ---------------------------------
585 -- This section discusses the compound structures used to represent
586 -- constructed patterns. It shows the graph structures of pattern
587 -- elements that are constructed, and in the case of patterns that
588 -- provide backtracking possibilities, describes how the history
589 -- stack is used to control the backtracking. Finally, it notes the
590 -- way in which the Index numbers are assigned to the structure.
592 -- In all diagrams, solid lines (built witth minus signs or vertical
593 -- bars, represent successor pointers (Pthen fields) with > or V used
594 -- to indicate the direction of the pointer. The initial node of the
595 -- structure is in the upper left of the diagram. A dotted line is an
596 -- alternative pointer from the element above it to the element below
597 -- it. See individual sections for details on how alternatives are used.
603 -- In the pattern structures listed in this section, a line that looks
604 -- lile ----> with nothing to the right indicates an end of pattern
605 -- (EOP) pointer that represents the end of the match.
607 -- When a pattern concatenation (L & R) occurs, the resulting structure
608 -- is obtained by finding all such EOP pointers in L, and replacing
609 -- them to point to R. This is the most important flattening that
610 -- occurs in constructing a pattern, and it means that the pattern
611 -- matching circuitry does not have to keep track of the structure
612 -- of a pattern with respect to concatenation, since the appropriate
613 -- succesor is always at hand.
615 -- Concatenation itself generates no additional possibilities for
616 -- backtracking, but the constituent patterns of the concatenated
617 -- structure will make stack entries as usual. The maximum amount
618 -- of stack required by the structure is thus simply the sum of the
619 -- maximums required by L and R.
621 -- The index numbering of a concatenation structure works by leaving
622 -- the numbering of the right hand pattern, R, unchanged and adjusting
623 -- the numbers in the left hand pattern, L up by the count of elements
624 -- in R. This ensures that the maximum numbered element is the leading
625 -- element as required (given that it was the leading element in L).
631 -- A pattern (L or R) constructs the structure:
634 -- | A |---->| L |---->
642 -- The A element here is a PC_Alt node, and the dotted line represents
643 -- the contents of the Alt field. When the PC_Alt element is matched,
644 -- it stacks a pointer to the leading element of R on the history stack
645 -- so that on subsequent failure, a match of R is attempted.
647 -- The A node is the higest numbered element in the pattern. The
648 -- original index numbers of R are unchanged, but the index numbers
649 -- of the L pattern are adjusted up by the count of elements in R.
651 -- Note that the difference between the index of the L leading element
652 -- the index of the R leading element (after building the alt structure)
653 -- indicates the number of nodes in L, and this is true even after the
654 -- structure is incorporated into some larger structure. For example,
655 -- if the A node has index 16, and L has index 15 and R has index
656 -- 5, then we know that L has 10 (15-5) elements in it.
658 -- Suppose that we now concatenate this structure to another pattern
659 -- with 9 elements in it. We will now have the A node with an index
660 -- of 25, L with an index of 24 and R with an index of 14. We still
661 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
662 -- consequently the successor of the alternation structure has an
663 -- index with a value less than 15. This is used in Image to figure
664 -- out the original recursive structure of a pattern.
666 -- To clarify the interaction of the alternation and concatenation
667 -- structures, here is a more complex example of the structure built
670 -- (V or W or X) (Y or Z)
672 -- where A,B,C,D,E are all single element patterns:
674 -- +---+ +---+ +---+ +---+
675 -- I A I---->I V I---+-->I A I---->I Y I---->
676 -- +---+ +---+ I +---+ +---+
679 -- +---+ +---+ I +---+
680 -- I A I---->I W I-->I I Z I---->
681 -- +---+ +---+ I +---+
685 -- I X I------------>+
688 -- The numbering of the nodes would be as follows:
690 -- +---+ +---+ +---+ +---+
691 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
692 -- +---+ +---+ I +---+ +---+
695 -- +---+ +---+ I +---+
696 -- I 6 I---->I 5 I-->I I 1 I---->
697 -- +---+ +---+ I +---+
701 -- I 4 I------------>+
704 -- Note: The above structure actually corresponds to
706 -- (A or (B or C)) (D or E)
710 -- ((A or B) or C) (D or E)
712 -- which is the more natural interpretation, but in fact alternation
713 -- is associative, and the construction of an alternative changes the
714 -- left grouped pattern to the right grouped pattern in any case, so
715 -- that the Image function produces a more natural looking output.
721 -- An Arb pattern builds the structure
732 -- The X node is a PC_Arb_X node, which matches null, and stacks a
733 -- pointer to Y node, which is the PC_Arb_Y node that matches one
734 -- extra character and restacks itself.
736 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1
738 -------------------------
739 -- Arbno (simple case) --
740 -------------------------
742 -- The simple form of Arbno can be used where the pattern always
743 -- matches at least one character if it succeeds, and it is known
744 -- not to make any history stack entries. In this case, Arbno (P)
745 -- can construct the following structure:
759 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
760 -- pattern P. If a subsequent failure causes P to be matched and
761 -- this match succeeds, then node A gets restacked to try another
762 -- instance if needed by a subsequent failure.
764 -- The node numbering of the constituent pattern P is not affected.
765 -- The S node has a node number of P.Index + 1.
767 --------------------------
768 -- Arbno (complex case) --
769 --------------------------
771 -- A call to Arbno (P), where P can match null (or at least is not
772 -- known to require a non-null string) and/or P requires pattern stack
773 -- entries, constructs the following structure:
775 -- +--------------------------+
783 -- +---+ +---+ +---+ |
784 -- | E |---->| P |---->| Y |--->+
787 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
788 -- E-P-X structure used to match one Arbno instance.
790 -- Here E is the PC_R_Enter node which matches null and creates two
791 -- stack entries. The first is a special entry whose node field is
792 -- not used at all, and whose cursor field has the initial cursor.
794 -- The second entry corresponds to a standard new region action. A
795 -- PC_R_Remove node is stacked, whose cursor field is used to store
796 -- the outer stack base, and the stack base is reset to point to
797 -- this PC_R_Remove node. Then the pattern P is matched, and it can
798 -- make history stack entries in the normal manner, so now the stack
801 -- (stack entries made before assign pattern)
803 -- (Special entry, node field not used,
804 -- used only to save initial cursor)
806 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
807 -- saved base value for the enclosing region)
809 -- (stack entries made by matching P)
811 -- If the match of P fails, then the PC_R_Remove entry is popped and
812 -- it removes both itself and the special entry underneath it,
813 -- restores the outer stack base, and signals failure.
815 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
816 -- the inner region. There are two possibilities. If matching P left
817 -- no stack entries, then all traces of the inner region can be removed.
818 -- If there are stack entries, then we push an PC_Region_Replace stack
819 -- entry whose "cursor" value is the inner stack base value, and then
820 -- restore the outer stack base value, so the stack looks like:
822 -- (stack entries made before assign pattern)
824 -- (Special entry, node field not used,
825 -- used only to save initial cursor)
827 -- (PC_R_Remove entry, "cursor" value is (negative)
828 -- saved base value for the enclosing region)
830 -- (stack entries made by matching P)
832 -- (PC_Region_Replace entry, "cursor" value is (negative)
833 -- stack pointer value referencing the PC_R_Remove entry).
835 -- Now that we have matched another instance of the Arbno pattern,
836 -- we need to move to the successor. There are two cases. If the
837 -- Arbno pattern matched null, then there is no point in seeking
838 -- alternatives, since we would just match a whole bunch of nulls.
839 -- In this case we look through the alternative node, and move
840 -- directly to its successor (i.e. the successor of the Arbno
841 -- pattern). If on the other hand a non-null string was matched,
842 -- we simply follow the successor to the alternative node, which
843 -- sets up for another possible match of the Arbno pattern.
845 -- As noted in the section on stack checking, the stack count (and
846 -- hence the stack check) for a pattern includes only one iteration
847 -- of the Arbno pattern. To make sure that multiple iterations do not
848 -- overflow the stack, the Arbno node saves the stack count required
849 -- by a single iteration, and the Concat function increments this to
850 -- include stack entries required by any successor. The PC_Arbno_Y
851 -- node uses this count to ensure that sufficient stack remains
852 -- before proceeding after matching each new instance.
854 -- The node numbering of the constituent pattern P is not affected.
855 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
856 -- the E node is N + 2, and the X node is N + 3.
858 ----------------------
859 -- Assign Immediate --
860 ----------------------
862 -- Immediate assignment (P * V) constructs the following structure
865 -- | E |---->| P |---->| A |---->
868 -- Here E is the PC_R_Enter node which matches null and creates two
869 -- stack entries. The first is a special entry whose node field is
870 -- not used at all, and whose cursor field has the initial cursor.
872 -- The second entry corresponds to a standard new region action. A
873 -- PC_R_Remove node is stacked, whose cursor field is used to store
874 -- the outer stack base, and the stack base is reset to point to
875 -- this PC_R_Remove node. Then the pattern P is matched, and it can
876 -- make history stack entries in the normal manner, so now the stack
879 -- (stack entries made before assign pattern)
881 -- (Special entry, node field not used,
882 -- used only to save initial cursor)
884 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
885 -- saved base value for the enclosing region)
887 -- (stack entries made by matching P)
889 -- If the match of P fails, then the PC_R_Remove entry is popped
890 -- and it removes both itself and the special entry underneath it,
891 -- restores the outer stack base, and signals failure.
893 -- If the match of P succeeds, then node A, which is the actual
894 -- PC_Assign_Imm node, executes the assignment (using the stack
895 -- base to locate the entry with the saved starting cursor value),
896 -- and the pops the inner region. There are two possibilities, if
897 -- matching P left no stack entries, then all traces of the inner
898 -- region can be removed. If there are stack entries, then we push
899 -- an PC_Region_Replace stack entry whose "cursor" value is the
900 -- inner stack base value, and then restore the outer stack base
901 -- value, so the stack looks like:
903 -- (stack entries made before assign pattern)
905 -- (Special entry, node field not used,
906 -- used only to save initial cursor)
908 -- (PC_R_Remove entry, "cursor" value is (negative)
909 -- saved base value for the enclosing region)
911 -- (stack entries made by matching P)
913 -- (PC_Region_Replace entry, "cursor" value is the (negative)
914 -- stack pointer value referencing the PC_R_Remove entry).
916 -- If a subsequent failure occurs, the PC_Region_Replace node restores
917 -- the inner stack base value and signals failure to explore rematches
920 -- The node numbering of the constituent pattern P is not affected.
921 -- Where N is the number of nodes in P, the A node is numbered N + 1,
922 -- and the E node is N + 2.
924 ---------------------
925 -- Assign On Match --
926 ---------------------
928 -- The assign on match (**) pattern is quite similar to the assign
929 -- immediate pattern, except that the actual assignment has to be
930 -- delayed. The following structure is constructed:
933 -- | E |---->| P |---->| A |---->
936 -- The operation of this pattern is identical to that described above
937 -- for deferred assignment, up to the point where P has been matched.
939 -- The A node, which is the PC_Assign_OnM node first pushes a
940 -- PC_Assign node onto the history stack. This node saves the ending
941 -- cursor and acts as a flag for the final assignment, as further
944 -- It then stores a pointer to itself in the special entry node field.
945 -- This was otherwise unused, and is now used to retrive the address
946 -- of the variable to be assigned at the end of the pattern.
948 -- After that the inner region is terminated in the usual manner,
949 -- by stacking a PC_R_Restore entry as described for the assign
950 -- immediate case. Note that the optimization of completely
951 -- removing the inner region does not happen in this case, since
952 -- we have at least one stack entry (the PC_Assign one we just made).
953 -- The stack now looks like:
955 -- (stack entries made before assign pattern)
957 -- (Special entry, node points to copy of
958 -- the PC_Assign_OnM node, and the
959 -- cursor field saves the initial cursor).
961 -- (PC_R_Remove entry, "cursor" value is (negative)
962 -- saved base value for the enclosing region)
964 -- (stack entries made by matching P)
966 -- (PC_Assign entry, saves final cursor)
968 -- (PC_Region_Replace entry, "cursor" value is (negative)
969 -- stack pointer value referencing the PC_R_Remove entry).
971 -- If a subsequent failure causes the PC_Assign node to execute it
972 -- simply removes itself and propagates the failure.
974 -- If the match succeeds, then the history stack is scanned for
975 -- PC_Assign nodes, and the assignments are executed (examination
976 -- of the above diagram will show that all the necessary data is
977 -- at hand for the assignment).
979 -- To optimize the common case where no assign-on-match operations
980 -- are present, a global flag Assign_OnM is maintained which is
981 -- initialize to False, and gets set True as part of the execution
982 -- of the PC_Assign_OnM node. The scan of the history stack for
983 -- PC_Assign entries is done only if this flag is set.
985 -- The node numbering of the constituent pattern P is not affected.
986 -- Where N is the number of nodes in P, the A node is numbered N + 1,
987 -- and the E node is N + 2.
993 -- Bal builds a single node:
999 -- The node B is the PC_Bal node which matches a parentheses balanced
1000 -- string, starting at the current cursor position. It then updates
1001 -- the cursor past this matched string, and stacks a pointer to itself
1002 -- with this updated cursor value on the history stack, to extend the
1003 -- matched string on a subequent failure.
1005 -- Since this is a single node it is numbered 1 (the reason we include
1006 -- it in the compound patterns section is that it backtracks).
1012 -- BreakX builds the structure
1015 -- | B |---->| A |---->
1023 -- Here the B node is the BreakX_xx node that performs a normal Break
1024 -- function. The A node is an alternative (PC_Alt) node that matches
1025 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1026 -- extends the match one character (to eat up the previously detected
1027 -- break character), and then rematches the break.
1029 -- The B node is numbered 3, the alternative node is 1, and the X
1036 -- Fence builds a single node:
1042 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1043 -- PC_Cancel element which will abort the match on a subsequent failure.
1045 -- Since this is a single element it is numbered 1 (the reason we
1046 -- include it in the compound patterns section is that it backtracks).
1048 --------------------
1049 -- Fence Function --
1050 --------------------
1052 -- A call to the Fence function builds the structure:
1054 -- +---+ +---+ +---+
1055 -- | E |---->| P |---->| X |---->
1056 -- +---+ +---+ +---+
1058 -- Here E is the PC_R_Enter node which matches null and creates two
1059 -- stack entries. The first is a special entry which is not used at
1060 -- all in the fence case (it is present merely for uniformity with
1061 -- other cases of region enter operations).
1063 -- The second entry corresponds to a standard new region action. A
1064 -- PC_R_Remove node is stacked, whose cursor field is used to store
1065 -- the outer stack base, and the stack base is reset to point to
1066 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1067 -- make history stack entries in the normal manner, so now the stack
1070 -- (stack entries made before fence pattern)
1072 -- (Special entry, not used at all)
1074 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1075 -- saved base value for the enclosing region)
1077 -- (stack entries made by matching P)
1079 -- If the match of P fails, then the PC_R_Remove entry is popped
1080 -- and it removes both itself and the special entry underneath it,
1081 -- restores the outer stack base, and signals failure.
1083 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1084 -- control. One might be tempted to think that at this point, the
1085 -- history stack entries made by matching P can just be removed since
1086 -- they certainly are not going to be used for rematching (that is
1087 -- whole point of Fence after all!) However, this is wrong, because
1088 -- it would result in the loss of possible assign-on-match entries
1089 -- for deferred pattern assignments.
1091 -- Instead what we do is to make a special entry whose node references
1092 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1093 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1094 -- pointer is restored, so the stack looks like:
1096 -- (stack entries made before assign pattern)
1098 -- (Special entry, not used at all)
1100 -- (PC_R_Remove entry, "cursor" value is (negative)
1101 -- saved base value for the enclosing region)
1103 -- (stack entries made by matching P)
1105 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1106 -- pointer value referencing the PC_R_Remove entry).
1108 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1109 -- the entire inner region, including all entries made by matching P,
1110 -- and alternatives prior to the Fence pattern are sought.
1112 -- The node numbering of the constituent pattern P is not affected.
1113 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1114 -- and the E node is N + 2.
1120 -- Succeed builds a single node:
1126 -- The node S is the PC_Succeed node which matches null, and stacks
1127 -- a pointer to itself on the history stack, so that a subsequent
1128 -- failure repeats the same match.
1130 -- Since this is a single node it is numbered 1 (the reason we include
1131 -- it in the compound patterns section is that it backtracks).
1133 ---------------------
1134 -- Write Immediate --
1135 ---------------------
1137 -- The structure built for a write immediate operation (P * F, where
1138 -- F is a file access value) is:
1140 -- +---+ +---+ +---+
1141 -- | E |---->| P |---->| W |---->
1142 -- +---+ +---+ +---+
1144 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1145 -- handling is identical to that described above for Assign Immediate,
1146 -- except that at the point where a successful match occurs, the matched
1147 -- substring is written to the referenced file.
1149 -- The node numbering of the constituent pattern P is not affected.
1150 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1151 -- and the E node is N + 2.
1153 --------------------
1154 -- Write On Match --
1155 --------------------
1157 -- The structure built for a write on match operation (P ** F, where
1158 -- F is a file access value) is:
1160 -- +---+ +---+ +---+
1161 -- | E |---->| P |---->| W |---->
1162 -- +---+ +---+ +---+
1164 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1165 -- handling is identical to that described above for Assign On Match,
1166 -- except that at the point where a successful match has completed,
1167 -- the matched substring is written to the referenced file.
1169 -- The node numbering of the constituent pattern P is not affected.
1170 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1171 -- and the E node is N + 2.
1172 -----------------------
1173 -- Constant Patterns --
1174 -----------------------
1176 -- The following pattern elements are referenced only from the pattern
1177 -- history stack. In each case the processing for the pattern element
1178 -- results in pattern match abort, or futher failure, so there is no
1179 -- need for a successor and no need for a node number
1181 CP_Assign : aliased PE := (PC_Assign, 0, N);
1182 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1183 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1184 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1185 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1187 -----------------------
1188 -- Local Subprograms --
1189 -----------------------
1191 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1192 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1193 -- Build pattern structure corresponding to the alternation of L, R.
1194 -- (i.e. try to match L, and if that fails, try to match R).
1196 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1197 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1198 -- match at least one character if it succeeds and to require no
1199 -- stack entries under all circumstances. The result returned is
1200 -- a simple Arbno structure as previously described.
1202 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1203 -- Given two single node pattern elements E and A, and a (possible
1204 -- complex) pattern P, construct the concatenation E-->P-->A and
1205 -- return a pointer to E. The concatenation does not affect the
1206 -- node numbering in P. A has a number one higher than the maximum
1207 -- number in P, and E has a number two higher than the maximum
1208 -- number in P (see for example the Assign_Immediate structure to
1209 -- understand a typical use of this function).
1211 function BreakX_Make (B : PE_Ptr) return Pattern;
1212 -- Given a pattern element for a Break patternx, returns the
1213 -- corresponding BreakX compound pattern structure.
1215 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1216 -- Creates a pattern eelement that represents a concatenation of the
1217 -- two given pattern elements (i.e. the pattern L followed by R).
1218 -- The result returned is always the same as L, but the pattern
1219 -- referenced by L is modified to have R as a successor. This
1220 -- procedure does not copy L or R, so if a copy is required, it
1221 -- is the responsibility of the caller. The Incr parameter is an
1222 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1223 -- in the left operand, it represents the additional stack space
1224 -- required by the right operand.
1226 function C_To_PE (C : PChar) return PE_Ptr;
1227 -- Given a character, constructs a pattern element that matches
1228 -- the single character.
1230 function Copy (P : PE_Ptr) return PE_Ptr;
1231 -- Creates a copy of the pattern element referenced by the given
1232 -- pattern element reference. This is a deep copy, which means that
1233 -- it follows the Next and Alt pointers.
1235 function Image (P : PE_Ptr) return String;
1236 -- Returns the image of the address of the referenced pattern element.
1237 -- This is equivalent to Image (To_Address (P));
1239 function Is_In (C : Character; Str : String) return Boolean;
1240 pragma Inline (Is_In);
1241 -- Determines if the character C is in string Str
1243 procedure Logic_Error;
1244 -- Called to raise Program_Error with an appropriate message if an
1245 -- internal logic error is detected.
1247 function Str_BF (A : Boolean_Func) return String;
1248 function Str_FP (A : File_Ptr) return String;
1249 function Str_NF (A : Natural_Func) return String;
1250 function Str_NP (A : Natural_Ptr) return String;
1251 function Str_PP (A : Pattern_Ptr) return String;
1252 function Str_VF (A : VString_Func) return String;
1253 function Str_VP (A : VString_Ptr) return String;
1254 -- These are debugging routines, which return a representation of the
1255 -- given access value (they are called only by Image and Dump)
1257 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1258 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1259 -- are made. In particular, Succ is unchanged, and no index numbers
1260 -- are modified. Note that Pat may not be equal to EOP on entry.
1262 function S_To_PE (Str : PString) return PE_Ptr;
1263 -- Given a string, constructs a pattern element that matches the string
1265 procedure Uninitialized_Pattern;
1266 pragma No_Return (Uninitialized_Pattern);
1267 -- Called to raise Program_Error with an appropriate error message if
1268 -- an uninitialized pattern is used in any pattern construction or
1269 -- pattern matching operation.
1275 Start : out Natural;
1276 Stop : out Natural);
1277 -- This is the common pattern match routine. It is passed a string and
1278 -- a pattern, and it indicates success or failure, and on success the
1279 -- section of the string matched. It does not perform any assignments
1280 -- to the subject string, so pattern replacement is for the caller.
1282 -- Subject The subject string. The lower bound is always one. In the
1283 -- Match procedures, it is fine to use strings whose lower bound
1284 -- is not one, but we perform a one time conversion before the
1285 -- call to XMatch, so that XMatch does not have to be bothered
1286 -- with strange lower bounds.
1288 -- Pat_P Points to initial pattern element of pattern to be matched
1290 -- Pat_S Maximum required stack entries for pattern to be matched
1292 -- Start If match is successful, starting index of matched section.
1293 -- This value is always non-zero. A value of zero is used to
1294 -- indicate a failed match.
1296 -- Stop If match is successful, ending index of matched section.
1297 -- This can be zero if we match the null string at the start,
1298 -- in which case Start is set to zero, and Stop to one. If the
1299 -- Match fails, then the contents of Stop is undefined.
1305 Start : out Natural;
1306 Stop : out Natural);
1307 -- Identical in all respects to XMatch, except that trace information is
1308 -- output on Standard_Ouput during execution of the match. This is the
1309 -- version that is called if the original Match call has Debug => True.
1315 function "&" (L : PString; R : Pattern) return Pattern is
1317 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1320 function "&" (L : Pattern; R : PString) return Pattern is
1322 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1325 function "&" (L : PChar; R : Pattern) return Pattern is
1327 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1330 function "&" (L : Pattern; R : PChar) return Pattern is
1332 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1335 function "&" (L : Pattern; R : Pattern) return Pattern is
1337 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1346 -- +---+ +---+ +---+
1347 -- | E |---->| P |---->| A |---->
1348 -- +---+ +---+ +---+
1350 -- The node numbering of the constituent pattern P is not affected.
1351 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1352 -- and the E node is N + 2.
1354 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1355 Pat : constant PE_Ptr := Copy (P.P);
1356 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1357 A : constant PE_Ptr :=
1358 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1360 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1363 function "*" (P : PString; Var : VString_Var) return Pattern is
1364 Pat : constant PE_Ptr := S_To_PE (P);
1365 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1366 A : constant PE_Ptr :=
1367 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1369 return (AFC with 3, Bracket (E, Pat, A));
1372 function "*" (P : PChar; Var : VString_Var) return Pattern is
1373 Pat : constant PE_Ptr := C_To_PE (P);
1374 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1375 A : constant PE_Ptr :=
1376 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1378 return (AFC with 3, Bracket (E, Pat, A));
1383 -- +---+ +---+ +---+
1384 -- | E |---->| P |---->| W |---->
1385 -- +---+ +---+ +---+
1387 -- The node numbering of the constituent pattern P is not affected.
1388 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1389 -- and the E node is N + 2.
1391 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1392 Pat : constant PE_Ptr := Copy (P.P);
1393 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1394 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1396 return (AFC with 3, Bracket (E, Pat, W));
1399 function "*" (P : PString; Fil : File_Access) return Pattern is
1400 Pat : constant PE_Ptr := S_To_PE (P);
1401 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1402 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1404 return (AFC with 3, Bracket (E, Pat, W));
1407 function "*" (P : PChar; Fil : File_Access) return Pattern is
1408 Pat : constant PE_Ptr := C_To_PE (P);
1409 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1410 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1412 return (AFC with 3, Bracket (E, Pat, W));
1421 -- +---+ +---+ +---+
1422 -- | E |---->| P |---->| A |---->
1423 -- +---+ +---+ +---+
1425 -- The node numbering of the constituent pattern P is not affected.
1426 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1427 -- and the E node is N + 2.
1429 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1430 Pat : constant PE_Ptr := Copy (P.P);
1431 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1432 A : constant PE_Ptr :=
1433 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1435 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1438 function "**" (P : PString; Var : VString_Var) return Pattern is
1439 Pat : constant PE_Ptr := S_To_PE (P);
1440 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1441 A : constant PE_Ptr :=
1442 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1444 return (AFC with 3, Bracket (E, Pat, A));
1447 function "**" (P : PChar; Var : VString_Var) return Pattern is
1448 Pat : constant PE_Ptr := C_To_PE (P);
1449 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1450 A : constant PE_Ptr :=
1451 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1453 return (AFC with 3, Bracket (E, Pat, A));
1458 -- +---+ +---+ +---+
1459 -- | E |---->| P |---->| W |---->
1460 -- +---+ +---+ +---+
1462 -- The node numbering of the constituent pattern P is not affected.
1463 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1464 -- and the E node is N + 2.
1466 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1467 Pat : constant PE_Ptr := Copy (P.P);
1468 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1469 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1471 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1474 function "**" (P : PString; Fil : File_Access) return Pattern is
1475 Pat : constant PE_Ptr := S_To_PE (P);
1476 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1477 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1479 return (AFC with 3, Bracket (E, Pat, W));
1482 function "**" (P : PChar; Fil : File_Access) return Pattern is
1483 Pat : constant PE_Ptr := C_To_PE (P);
1484 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1485 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1487 return (AFC with 3, Bracket (E, Pat, W));
1494 function "+" (Str : VString_Var) return Pattern is
1498 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1501 function "+" (Str : VString_Func) return Pattern is
1503 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1506 function "+" (P : Pattern_Var) return Pattern is
1510 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1513 function "+" (P : Boolean_Func) return Pattern is
1515 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1522 function "or" (L : PString; R : Pattern) return Pattern is
1524 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1527 function "or" (L : Pattern; R : PString) return Pattern is
1529 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1532 function "or" (L : PString; R : PString) return Pattern is
1534 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1537 function "or" (L : Pattern; R : Pattern) return Pattern is
1540 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1543 function "or" (L : PChar; R : Pattern) return Pattern is
1545 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1548 function "or" (L : Pattern; R : PChar) return Pattern is
1550 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1553 function "or" (L : PChar; R : PChar) return Pattern is
1555 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1558 function "or" (L : PString; R : PChar) return Pattern is
1560 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1563 function "or" (L : PChar; R : PString) return Pattern is
1565 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1572 -- No two patterns share the same pattern elements, so the adjust
1573 -- procedure for a Pattern assignment must do a deep copy of the
1574 -- pattern element structure.
1576 procedure Adjust (Object : in out Pattern) is
1578 Object.P := Copy (Object.P);
1585 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1587 -- If the left pattern is null, then we just add the alternation
1588 -- node with an index one greater than the right hand pattern.
1591 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1593 -- If the left pattern is non-null, then build a reference vector
1594 -- for its elements, and adjust their index values to acccomodate
1595 -- the right hand elements. Then add the alternation node.
1599 Refs : Ref_Array (1 .. L.Index);
1602 Build_Ref_Array (L, Refs);
1604 for J in Refs'Range loop
1605 Refs (J).Index := Refs (J).Index + R.Index;
1609 return new PE'(PC_Alt, L.Index + 1, L, R);
1617 function Any (Str : String) return Pattern is
1619 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1622 function Any (Str : VString) return Pattern is
1624 return Any (S (Str));
1627 function Any (Str : Character) return Pattern is
1629 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1632 function Any (Str : Character_Set) return Pattern is
1634 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1637 function Any (Str : not null access VString) return Pattern is
1639 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1642 function Any (Str : VString_Func) return Pattern is
1644 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1660 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1
1662 function Arb return Pattern is
1663 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1664 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1666 return (AFC with 1, X);
1673 function Arbno (P : PString) return Pattern is
1675 if P'Length = 0 then
1676 return (AFC with 0, EOP);
1678 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1682 function Arbno (P : PChar) return Pattern is
1684 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1687 function Arbno (P : Pattern) return Pattern is
1688 Pat : constant PE_Ptr := Copy (P.P);
1692 and then OK_For_Simple_Arbno (Pat.Pcode)
1694 return (AFC with 0, Arbno_Simple (Pat));
1697 -- This is the complex case, either the pattern makes stack entries
1698 -- or it is possible for the pattern to match the null string (more
1699 -- accurately, we don't know that this is not the case).
1701 -- +--------------------------+
1709 -- +---+ +---+ +---+ |
1710 -- | E |---->| P |---->| Y |--->+
1711 -- +---+ +---+ +---+
1713 -- The node numbering of the constituent pattern P is not affected.
1714 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1715 -- the E node is N + 2, and the X node is N + 3.
1718 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1719 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1720 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1721 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1724 X.Index := EPY.Index + 1;
1725 return (AFC with P.Stk + 3, X);
1742 -- | P |---------->+
1745 -- The node numbering of the constituent pattern P is not affected.
1746 -- The S node has a node number of P.Index + 1.
1748 -- Note that we know that P cannot be EOP, because a null pattern
1749 -- does not meet the requirements for simple Arbno.
1751 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1752 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1754 Set_Successor (P, S);
1762 function Bal return Pattern is
1764 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1771 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1780 Set_Successor (P, A);
1781 E.Index := P.Index + 2;
1782 A.Index := P.Index + 1;
1792 function Break (Str : String) return Pattern is
1794 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1797 function Break (Str : VString) return Pattern is
1799 return Break (S (Str));
1802 function Break (Str : Character) return Pattern is
1804 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1807 function Break (Str : Character_Set) return Pattern is
1809 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1812 function Break (Str : not null access VString) return Pattern is
1815 new PE'(PC_Break_VP, 1, EOP, Str.all'Unchecked_Access));
1818 function Break (Str : VString_Func) return Pattern is
1820 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1827 function BreakX (Str : String) return Pattern is
1829 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1832 function BreakX (Str : VString) return Pattern is
1834 return BreakX (S (Str));
1837 function BreakX (Str : Character) return Pattern is
1839 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1842 function BreakX (Str : Character_Set) return Pattern is
1844 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1847 function BreakX (Str : not null access VString) return Pattern is
1849 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1852 function BreakX (Str : VString_Func) return Pattern is
1854 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1862 -- | B |---->| A |---->
1870 -- The B node is numbered 3, the alternative node is 1, and the X
1873 function BreakX_Make (B : PE_Ptr) return Pattern is
1874 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1875 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1878 return (AFC with 2, B);
1881 ---------------------
1882 -- Build_Ref_Array --
1883 ---------------------
1885 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1887 procedure Record_PE (E : PE_Ptr);
1888 -- Record given pattern element if not already recorded in RA,
1889 -- and also record any referenced pattern elements recursively.
1895 procedure Record_PE (E : PE_Ptr) is
1897 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1899 if E = EOP or else RA (E.Index) /= null then
1900 Put_LineD (", nothing to do");
1904 Put_LineD (", recording" & IndexT'Image (E.Index));
1906 Record_PE (E.Pthen);
1908 if E.Pcode in PC_Has_Alt then
1914 -- Start of processing for Build_Ref_Array
1918 Put_LineD ("Entering Build_Ref_Array");
1921 end Build_Ref_Array;
1927 function C_To_PE (C : PChar) return PE_Ptr is
1929 return new PE'(PC_Char, 1, EOP, C);
1936 function Cancel return Pattern is
1938 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1945 -- Concat needs to traverse the left operand performing the following
1948 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1949 -- reset to point to the second operand.
1951 -- b) Any PC_Arbno_Y node has its stack count field incremented
1952 -- by the parameter Incr provided for this purpose.
1954 -- d) Num fields of all pattern elements in the left operand are
1955 -- adjusted to include the elements of the right operand.
1957 -- Note: we do not use Set_Successor in the processing for Concat, since
1958 -- there is no point in doing two traversals, we may as well do everything
1959 -- at the same time.
1961 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1971 Refs : Ref_Array (1 .. L.Index);
1972 -- We build a reference array for L whose N'th element points to
1973 -- the pattern element of L whose original Index value is N.
1978 Build_Ref_Array (L, Refs);
1980 for J in Refs'Range loop
1983 P.Index := P.Index + R.Index;
1985 if P.Pcode = PC_Arbno_Y then
1986 P.Nat := P.Nat + Incr;
1989 if P.Pthen = EOP then
1993 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2007 function Copy (P : PE_Ptr) return PE_Ptr is
2010 Uninitialized_Pattern;
2014 Refs : Ref_Array (1 .. P.Index);
2015 -- References to elements in P, indexed by Index field
2017 Copy : Ref_Array (1 .. P.Index);
2018 -- Holds copies of elements of P, indexed by Index field
2023 Build_Ref_Array (P, Refs);
2025 -- Now copy all nodes
2027 for J in Refs'Range loop
2028 Copy (J) := new PE'(Refs (J).all);
2031 -- Adjust all internal references
2033 for J in Copy'Range loop
2036 -- Adjust successor pointer to point to copy
2038 if E.Pthen /= EOP then
2039 E.Pthen := Copy (E.Pthen.Index);
2042 -- Adjust Alt pointer if there is one to point to copy
2044 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2045 E.Alt := Copy (E.Alt.Index);
2048 -- Copy referenced string
2050 if E.Pcode = PC_String then
2051 E.Str := new String'(E.Str.all);
2055 return Copy (P.Index);
2064 procedure Dump (P : Pattern) is
2066 subtype Count is Ada.Text_IO.Count;
2068 -- Used to keep track of column in dump output
2070 Refs : Ref_Array (1 .. P.P.Index);
2071 -- We build a reference array whose N'th element points to the
2072 -- pattern element whose Index value is N.
2074 Cols : Natural := 2;
2075 -- Number of columns used for pattern numbers, minimum is 2
2079 procedure Write_Node_Id (E : PE_Ptr);
2080 -- Writes out a string identifying the given pattern element
2086 procedure Write_Node_Id (E : PE_Ptr) is
2091 for J in 4 .. Cols loop
2097 Str : String (1 .. Cols);
2098 N : Natural := Natural (E.Index);
2103 for J in reverse Str'Range loop
2104 Str (J) := Character'Val (48 + N mod 10);
2113 -- Start of processing for Dump
2117 Put ("Pattern Dump Output (pattern at " &
2119 ", S = " & Natural'Image (P.Stk) & ')');
2124 while Col < Scol loop
2130 -- If uninitialized pattern, dump line and we are done
2133 Put_Line ("Uninitialized pattern value");
2137 -- If null pattern, just dump it and we are all done
2140 Put_Line ("EOP (null pattern)");
2144 Build_Ref_Array (P.P, Refs);
2146 -- Set number of columns required for node numbers
2148 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2152 -- Now dump the nodes in reverse sequence. We output them in reverse
2153 -- sequence since this corresponds to the natural order used to
2154 -- construct the patterns.
2156 for J in reverse Refs'Range loop
2159 Set_Col (Count (Cols) + 4);
2162 Put (Pattern_Code'Image (E.Pcode));
2164 Set_Col (21 + Count (Cols) + Address_Image_Length);
2165 Write_Node_Id (E.Pthen);
2166 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2174 Write_Node_Id (E.Alt);
2177 Put (Str_PP (E.PP));
2179 when PC_Pred_Func =>
2180 Put (Str_BF (E.BF));
2182 when PC_Assign_Imm |
2191 Put (Str_VP (E.VP));
2195 Put (Str_FP (E.FP));
2198 Put (Image (E.Str.all));
2201 Put (Image (E.Str2));
2204 Put (Image (E.Str3));
2207 Put (Image (E.Str4));
2210 Put (Image (E.Str5));
2213 Put (Image (E.Str6));
2216 Put (Str_NP (E.Var));
2225 Put (''' & E.Char & ''');
2233 Put ('"' & To_Sequence (E.CS) & '"');
2248 Put (Str_NF (E.NF));
2255 Put (Str_NP (E.NP));
2264 Put (Str_VF (E.VF));
2266 when others => null;
2280 function Fail return Pattern is
2282 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2291 function Fence return Pattern is
2293 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2298 -- +---+ +---+ +---+
2299 -- | E |---->| P |---->| X |---->
2300 -- +---+ +---+ +---+
2302 -- The node numbering of the constituent pattern P is not affected.
2303 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2304 -- and the E node is N + 2.
2306 function Fence (P : Pattern) return Pattern is
2307 Pat : constant PE_Ptr := Copy (P.P);
2308 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2309 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2311 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2318 procedure Finalize (Object : in out Pattern) is
2320 procedure Free is new Ada.Unchecked_Deallocation (PE, PE_Ptr);
2321 procedure Free is new Ada.Unchecked_Deallocation (String, String_Ptr);
2324 -- Nothing to do if already freed
2326 if Object.P = null then
2329 -- Otherwise we must free all elements
2333 Refs : Ref_Array (1 .. Object.P.Index);
2334 -- References to elements in pattern to be finalized
2337 Build_Ref_Array (Object.P, Refs);
2339 for J in Refs'Range loop
2340 if Refs (J).Pcode = PC_String then
2341 Free (Refs (J).Str);
2356 function Image (P : PE_Ptr) return String is
2358 return Image (To_Address (P));
2361 function Image (P : Pattern) return String is
2363 return S (Image (P));
2366 function Image (P : Pattern) return VString is
2368 Kill_Ampersand : Boolean := False;
2369 -- Set True to delete next & to be output to Result
2371 Result : VString := Nul;
2372 -- The result is accumulated here, using Append
2374 Refs : Ref_Array (1 .. P.P.Index);
2375 -- We build a reference array whose N'th element points to the
2376 -- pattern element whose Index value is N.
2378 procedure Delete_Ampersand;
2379 -- Deletes the ampersand at the end of Result
2381 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2382 -- E refers to a pattern structure whose successor is given by Succ.
2383 -- This procedure appends to Result a representation of this pattern.
2384 -- The Paren parameter indicates whether parentheses are required if
2385 -- the output is more than one element.
2387 procedure Image_One (E : in out PE_Ptr);
2388 -- E refers to a pattern structure. This procedure appends to Result
2389 -- a representation of the single simple or compound pattern structure
2390 -- at the start of E and updates E to point to its successor.
2392 ----------------------
2393 -- Delete_Ampersand --
2394 ----------------------
2396 procedure Delete_Ampersand is
2397 L : constant Natural := Length (Result);
2400 Delete (Result, L - 1, L);
2402 end Delete_Ampersand;
2408 procedure Image_One (E : in out PE_Ptr) is
2410 ER : PE_Ptr := E.Pthen;
2411 -- Successor set as result in E unless reset
2417 Append (Result, "Cancel");
2419 when PC_Alt => Alt : declare
2421 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2422 -- Number of elements in left pattern of alternation
2424 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2425 -- Number of lowest index in elements of left pattern
2430 -- The successor of the alternation node must have a lower
2431 -- index than any node that is in the left pattern or a
2432 -- higher index than the alternation node itself.
2435 and then ER.Index >= Lowest_In_L
2436 and then ER.Index < E.Index
2441 Append (Result, '(');
2445 Image_Seq (E1.Pthen, ER, False);
2446 Append (Result, " or ");
2448 exit when E1.Pcode /= PC_Alt;
2451 Image_Seq (E1, ER, False);
2452 Append (Result, ')');
2456 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2459 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2462 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2465 Append (Result, "Arb");
2468 Append (Result, "Arbno (");
2469 Image_Seq (E.Alt, E, False);
2470 Append (Result, ')');
2473 Append (Result, "Arbno (");
2474 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2475 Append (Result, ')');
2477 when PC_Assign_Imm =>
2479 Append (Result, "* " & Str_VP (Refs (E.Index).VP));
2481 when PC_Assign_OnM =>
2483 Append (Result, "** " & Str_VP (Refs (E.Index).VP));
2486 Append (Result, "Any ('" & E.Char & "')");
2489 Append (Result, "Bal");
2492 Append (Result, "Break ('" & E.Char & "')");
2495 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2498 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2501 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2503 when PC_BreakX_CH =>
2504 Append (Result, "BreakX ('" & E.Char & "')");
2507 when PC_BreakX_CS =>
2508 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2511 when PC_BreakX_VF =>
2512 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2515 when PC_BreakX_VP =>
2516 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2520 Append (Result, ''' & E.Char & ''');
2523 Append (Result, "Fail");
2526 Append (Result, "Fence");
2529 Append (Result, "Fence (");
2530 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2531 Append (Result, ")");
2532 ER := Refs (E.Index - 1).Pthen;
2535 Append (Result, "Len (" & E.Nat & ')');
2538 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2541 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2543 when PC_NotAny_CH =>
2544 Append (Result, "NotAny ('" & E.Char & "')");
2546 when PC_NotAny_CS =>
2547 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2549 when PC_NotAny_VF =>
2550 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2552 when PC_NotAny_VP =>
2553 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2556 Append (Result, "NSpan ('" & E.Char & "')");
2559 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2562 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2565 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2568 Append (Result, """""");
2571 Append (Result, "Pos (" & E.Nat & ')');
2574 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2577 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2580 Kill_Ampersand := True;
2583 Append (Result, "Rest");
2586 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2588 when PC_Pred_Func =>
2589 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2592 Append (Result, "RPos (" & E.Nat & ')');
2595 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2598 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2601 Append (Result, "RTab (" & E.Nat & ')');
2604 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2607 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2610 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2613 Append (Result, "Span ('" & E.Char & "')");
2616 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2619 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2622 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2625 Append (Result, Image (E.Str.all));
2628 Append (Result, Image (E.Str2));
2631 Append (Result, Image (E.Str3));
2634 Append (Result, Image (E.Str4));
2637 Append (Result, Image (E.Str5));
2640 Append (Result, Image (E.Str6));
2642 when PC_String_VF =>
2643 Append (Result, "(+" & Str_VF (E.VF) & ')');
2645 when PC_String_VP =>
2646 Append (Result, "(+" & Str_VP (E.VP) & ')');
2649 Append (Result, "Succeed");
2652 Append (Result, "Tab (" & E.Nat & ')');
2655 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2658 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2660 when PC_Write_Imm =>
2661 Append (Result, '(');
2662 Image_Seq (E, Refs (E.Index - 1), True);
2663 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2664 ER := Refs (E.Index - 1).Pthen;
2666 when PC_Write_OnM =>
2667 Append (Result, '(');
2668 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2669 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2670 ER := Refs (E.Index - 1).Pthen;
2672 -- Other pattern codes should not appear as leading elements
2683 Append (Result, "???");
2694 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2695 Indx : constant Natural := Length (Result);
2697 Mult : Boolean := False;
2700 -- The image of EOP is "" (the null string)
2703 Append (Result, """""");
2705 -- Else generate appropriate concatenation sequence
2710 exit when E1 = Succ;
2714 if Kill_Ampersand then
2715 Kill_Ampersand := False;
2717 Append (Result, " & ");
2722 if Mult and Paren then
2723 Insert (Result, Indx + 1, "(");
2724 Append (Result, ")");
2728 -- Start of processing for Image
2731 Build_Ref_Array (P.P, Refs);
2732 Image_Seq (P.P, EOP, False);
2740 function Is_In (C : Character; Str : String) return Boolean is
2742 for J in Str'Range loop
2755 function Len (Count : Natural) return Pattern is
2757 -- Note, the following is not just an optimization, it is needed
2758 -- to ensure that Arbno (Len (0)) does not generate an infinite
2759 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2762 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2765 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2769 function Len (Count : Natural_Func) return Pattern is
2771 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2774 function Len (Count : not null access Natural) return Pattern is
2776 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2783 procedure Logic_Error is
2786 (Program_Error'Identity,
2787 "Internal logic error in GNAT.Spitbol.Patterns");
2796 Pat : Pattern) return Boolean
2803 pragma Unreferenced (Stop);
2806 Get_String (Subject, S, L);
2809 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2811 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2819 Pat : Pattern) return Boolean
2821 Start, Stop : Natural;
2822 pragma Unreferenced (Stop);
2824 subtype String1 is String (1 .. Subject'Length);
2828 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2830 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2837 (Subject : VString_Var;
2839 Replace : VString) return Boolean
2847 Get_String (Subject, S, L);
2850 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2852 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2858 Get_String (Replace, S, L);
2860 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L));
2866 (Subject : VString_Var;
2868 Replace : String) return Boolean
2876 Get_String (Subject, S, L);
2879 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2881 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2888 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2902 pragma Unreferenced (Start, Stop);
2905 Get_String (Subject, S, L);
2908 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2910 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2918 Start, Stop : Natural;
2919 pragma Unreferenced (Start, Stop);
2921 subtype String1 is String (1 .. Subject'Length);
2925 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2927 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2932 (Subject : in out VString;
2942 Get_String (Subject, S, L);
2945 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2947 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2951 Get_String (Replace, S, L);
2952 Replace_Slice (Subject, Start, Stop, S (1 .. L));
2957 (Subject : in out VString;
2967 Get_String (Subject, S, L);
2970 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2972 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2976 Replace_Slice (Subject, Start, Stop, Replace);
2982 Pat : PString) return Boolean
2984 Pat_Len : constant Natural := Pat'Length;
2989 Get_String (Subject, S, L);
2991 if Anchored_Mode then
2995 return Pat = S (1 .. Pat_Len);
2999 for J in 1 .. L - Pat_Len + 1 loop
3000 if Pat = S (J .. J + (Pat_Len - 1)) then
3011 Pat : PString) return Boolean
3013 Pat_Len : constant Natural := Pat'Length;
3014 Sub_Len : constant Natural := Subject'Length;
3015 SFirst : constant Natural := Subject'First;
3018 if Anchored_Mode then
3019 if Pat_Len > Sub_Len then
3022 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
3026 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3027 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3037 (Subject : VString_Var;
3039 Replace : VString) return Boolean
3047 Get_String (Subject, S, L);
3050 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3052 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3058 Get_String (Replace, S, L);
3060 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L));
3066 (Subject : VString_Var;
3068 Replace : String) return Boolean
3076 Get_String (Subject, S, L);
3079 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3081 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3088 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3102 pragma Unreferenced (Start, Stop);
3105 Get_String (Subject, S, L);
3108 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3110 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3118 Start, Stop : Natural;
3119 pragma Unreferenced (Start, Stop);
3121 subtype String1 is String (1 .. Subject'Length);
3125 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3127 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3132 (Subject : in out VString;
3142 Get_String (Subject, S, L);
3145 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3147 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3151 Get_String (Replace, S, L);
3152 Replace_Slice (Subject, Start, Stop, S (1 .. L));
3157 (Subject : in out VString;
3167 Get_String (Subject, S, L);
3170 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3172 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3176 Replace_Slice (Subject, Start, Stop, Replace);
3181 (Subject : VString_Var;
3183 Result : Match_Result_Var) return Boolean
3191 Get_String (Subject, S, L);
3194 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3196 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3200 Result'Unrestricted_Access.all.Var := null;
3204 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3205 Result'Unrestricted_Access.all.Start := Start;
3206 Result'Unrestricted_Access.all.Stop := Stop;
3212 (Subject : in out VString;
3214 Result : out Match_Result)
3222 Get_String (Subject, S, L);
3225 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3227 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3233 Result.Var := Subject'Unrestricted_Access;
3234 Result.Start := Start;
3235 Result.Stop := Stop;
3243 procedure New_LineD is
3245 if Internal_Debug then
3254 function NotAny (Str : String) return Pattern is
3256 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3259 function NotAny (Str : VString) return Pattern is
3261 return NotAny (S (Str));
3264 function NotAny (Str : Character) return Pattern is
3266 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3269 function NotAny (Str : Character_Set) return Pattern is
3271 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3274 function NotAny (Str : not null access VString) return Pattern is
3276 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3279 function NotAny (Str : VString_Func) return Pattern is
3281 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3288 function NSpan (Str : String) return Pattern is
3290 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3293 function NSpan (Str : VString) return Pattern is
3295 return NSpan (S (Str));
3298 function NSpan (Str : Character) return Pattern is
3300 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3303 function NSpan (Str : Character_Set) return Pattern is
3305 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3308 function NSpan (Str : not null access VString) return Pattern is
3310 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3313 function NSpan (Str : VString_Func) return Pattern is
3315 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3322 function Pos (Count : Natural) return Pattern is
3324 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3327 function Pos (Count : Natural_Func) return Pattern is
3329 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3332 function Pos (Count : not null access Natural) return Pattern is
3334 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3341 procedure PutD (Str : String) is
3343 if Internal_Debug then
3352 procedure Put_LineD (Str : String) is
3354 if Internal_Debug then
3364 (Result : in out Match_Result;
3371 Get_String (Replace, S, L);
3373 if Result.Var /= null then
3374 Replace_Slice (Result.Var.all, Result.Start, Result.Stop, S (1 .. L));
3383 function Rest return Pattern is
3385 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3392 function Rpos (Count : Natural) return Pattern is
3394 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3397 function Rpos (Count : Natural_Func) return Pattern is
3399 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3402 function Rpos (Count : not null access Natural) return Pattern is
3404 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3411 function Rtab (Count : Natural) return Pattern is
3413 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3416 function Rtab (Count : Natural_Func) return Pattern is
3418 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3421 function Rtab (Count : not null access Natural) return Pattern is
3423 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3430 function S_To_PE (Str : PString) return PE_Ptr is
3431 Len : constant Natural := Str'Length;
3436 return new PE'(PC_Null, 1, EOP);
3439 return new PE'(PC_Char, 1, EOP, Str (Str'First));
3442 return new PE'(PC_String_2, 1, EOP, Str);
3445 return new PE'(PC_String_3, 1, EOP, Str);
3448 return new PE'(PC_String_4, 1, EOP, Str);
3451 return new PE'(PC_String_5, 1, EOP, Str);
3454 return new PE'(PC_String_6, 1, EOP, Str);
3457 return new PE'(PC_String, 1, EOP, new String'(Str));
3466 -- Note: this procedure is not used by the normal concatenation circuit,
3467 -- since other fixups are required on the left operand in this case, and
3468 -- they might as well be done all together.
3470 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3473 Uninitialized_Pattern;
3475 elsif Pat = EOP then
3480 Refs : Ref_Array (1 .. Pat.Index);
3481 -- We build a reference array for L whose N'th element points to
3482 -- the pattern element of L whose original Index value is N.
3487 Build_Ref_Array (Pat, Refs);
3489 for J in Refs'Range loop
3492 if P.Pthen = EOP then
3496 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3508 function Setcur (Var : not null access Natural) return Pattern is
3510 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3517 function Span (Str : String) return Pattern is
3519 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3522 function Span (Str : VString) return Pattern is
3524 return Span (S (Str));
3527 function Span (Str : Character) return Pattern is
3529 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3532 function Span (Str : Character_Set) return Pattern is
3534 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3537 function Span (Str : not null access VString) return Pattern is
3539 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3542 function Span (Str : VString_Func) return Pattern is
3544 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3551 function Str_BF (A : Boolean_Func) return String is
3552 function To_A is new Ada.Unchecked_Conversion (Boolean_Func, Address);
3554 return "BF(" & Image (To_A (A)) & ')';
3561 function Str_FP (A : File_Ptr) return String is
3563 return "FP(" & Image (A.all'Address) & ')';
3570 function Str_NF (A : Natural_Func) return String is
3571 function To_A is new Ada.Unchecked_Conversion (Natural_Func, Address);
3573 return "NF(" & Image (To_A (A)) & ')';
3580 function Str_NP (A : Natural_Ptr) return String is
3582 return "NP(" & Image (A.all'Address) & ')';
3589 function Str_PP (A : Pattern_Ptr) return String is
3591 return "PP(" & Image (A.all'Address) & ')';
3598 function Str_VF (A : VString_Func) return String is
3599 function To_A is new Ada.Unchecked_Conversion (VString_Func, Address);
3601 return "VF(" & Image (To_A (A)) & ')';
3608 function Str_VP (A : VString_Ptr) return String is
3610 return "VP(" & Image (A.all'Address) & ')';
3617 function Succeed return Pattern is
3619 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3626 function Tab (Count : Natural) return Pattern is
3628 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3631 function Tab (Count : Natural_Func) return Pattern is
3633 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3636 function Tab (Count : not null access Natural) return Pattern is
3638 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3641 ---------------------------
3642 -- Uninitialized_Pattern --
3643 ---------------------------
3645 procedure Uninitialized_Pattern is
3648 (Program_Error'Identity,
3649 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern");
3650 end Uninitialized_Pattern;
3660 Start : out Natural;
3664 -- Pointer to current pattern node. Initialized from Pat_P, and then
3665 -- updated as the match proceeds through its constituent elements.
3667 Length : constant Natural := Subject'Length;
3668 -- Length of string (= Subject'Last, since Subject'First is always 1)
3670 Cursor : Integer := 0;
3671 -- If the value is non-negative, then this value is the index showing
3672 -- the current position of the match in the subject string. The next
3673 -- character to be matched is at Subject (Cursor + 1). Note that since
3674 -- our view of the subject string in XMatch always has a lower bound
3675 -- of one, regardless of original bounds, that this definition exactly
3676 -- corresponds to the cursor value as referenced by functions like Pos.
3678 -- If the value is negative, then this is a saved stack pointer,
3679 -- typically a base pointer of an inner or outer region. Cursor
3680 -- temporarily holds such a value when it is popped from the stack
3681 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3682 -- cursor value before the match proceeds (e.g. by propagating the
3683 -- failure and popping a "real" cursor value from the stack.
3685 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3686 -- Dummy pattern element used in the unanchored case
3689 -- The pattern matching failure stack for this call to Match
3691 Stack_Ptr : Stack_Range;
3692 -- Current stack pointer. This points to the top element of the stack
3693 -- that is currently in use. At the outer level this is the special
3694 -- entry placed on the stack according to the anchor mode.
3696 Stack_Init : constant Stack_Range := Stack'First + 1;
3697 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3698 -- initial (Stack'First) element of the stack is not used so that
3699 -- when we pop the last element off, Stack_Ptr is still in range.
3701 Stack_Base : Stack_Range;
3702 -- This value is the stack base value, i.e. the stack pointer for the
3703 -- first history stack entry in the current stack region. See separate
3704 -- section on handling of recursive pattern matches.
3706 Assign_OnM : Boolean := False;
3707 -- Set True if assign-on-match or write-on-match operations may be
3708 -- present in the history stack, which must then be scanned on a
3709 -- successful match.
3711 procedure Pop_Region;
3712 pragma Inline (Pop_Region);
3713 -- Used at the end of processing of an inner region. if the inner
3714 -- region left no stack entries, then all trace of it is removed.
3715 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3716 -- handling of alternatives in the inner region.
3718 procedure Push (Node : PE_Ptr);
3719 pragma Inline (Push);
3720 -- Make entry in pattern matching stack with current cursor valeu
3722 procedure Push_Region;
3723 pragma Inline (Push_Region);
3724 -- This procedure makes a new region on the history stack. The
3725 -- caller first establishes the special entry on the stack, but
3726 -- does not push the stack pointer. Then this call stacks a
3727 -- PC_Remove_Region node, on top of this entry, using the cursor
3728 -- field of the PC_Remove_Region entry to save the outer level
3729 -- stack base value, and resets the stack base to point to this
3730 -- PC_Remove_Region node.
3736 procedure Pop_Region is
3738 -- If nothing was pushed in the inner region, we can just get
3739 -- rid of it entirely, leaving no traces that it was ever there
3741 if Stack_Ptr = Stack_Base then
3742 Stack_Ptr := Stack_Base - 2;
3743 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3745 -- If stuff was pushed in the inner region, then we have to
3746 -- push a PC_R_Restore node so that we properly handle possible
3747 -- rematches within the region.
3750 Stack_Ptr := Stack_Ptr + 1;
3751 Stack (Stack_Ptr).Cursor := Stack_Base;
3752 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3753 Stack_Base := Stack (Stack_Base).Cursor;
3761 procedure Push (Node : PE_Ptr) is
3763 Stack_Ptr := Stack_Ptr + 1;
3764 Stack (Stack_Ptr).Cursor := Cursor;
3765 Stack (Stack_Ptr).Node := Node;
3772 procedure Push_Region is
3774 Stack_Ptr := Stack_Ptr + 2;
3775 Stack (Stack_Ptr).Cursor := Stack_Base;
3776 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3777 Stack_Base := Stack_Ptr;
3780 -- Start of processing for XMatch
3783 if Pat_P = null then
3784 Uninitialized_Pattern;
3787 -- Check we have enough stack for this pattern. This check deals with
3788 -- every possibility except a match of a recursive pattern, where we
3789 -- make a check at each recursion level.
3791 if Pat_S >= Stack_Size - 1 then
3792 raise Pattern_Stack_Overflow;
3795 -- In anchored mode, the bottom entry on the stack is an abort entry
3797 if Anchored_Mode then
3798 Stack (Stack_Init).Node := CP_Cancel'Access;
3799 Stack (Stack_Init).Cursor := 0;
3801 -- In unanchored more, the bottom entry on the stack references
3802 -- the special pattern element PE_Unanchored, whose Pthen field
3803 -- points to the initial pattern element. The cursor value in this
3804 -- entry is the number of anchor moves so far.
3807 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3808 Stack (Stack_Init).Cursor := 0;
3811 Stack_Ptr := Stack_Init;
3812 Stack_Base := Stack_Ptr;
3817 -----------------------------------------
3818 -- Main Pattern Matching State Control --
3819 -----------------------------------------
3821 -- This is a state machine which uses gotos to change state. The
3822 -- initial state is Match, to initiate the matching of the first
3823 -- element, so the goto Match above starts the match. In the
3824 -- following descriptions, we indicate the global values that
3825 -- are relevant for the state transition.
3827 -- Come here if entire match fails
3834 -- Come here if entire match succeeds
3836 -- Cursor current position in subject string
3839 Start := Stack (Stack_Init).Cursor + 1;
3842 -- Scan history stack for deferred assignments or writes
3845 for S in Stack_Init .. Stack_Ptr loop
3846 if Stack (S).Node = CP_Assign'Access then
3848 Inner_Base : constant Stack_Range :=
3849 Stack (S + 1).Cursor;
3850 Special_Entry : constant Stack_Range :=
3852 Node_OnM : constant PE_Ptr :=
3853 Stack (Special_Entry).Node;
3854 Start : constant Natural :=
3855 Stack (Special_Entry).Cursor + 1;
3856 Stop : constant Natural := Stack (S).Cursor;
3859 if Node_OnM.Pcode = PC_Assign_OnM then
3860 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3862 elsif Node_OnM.Pcode = PC_Write_OnM then
3863 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3875 -- Come here if attempt to match current element fails
3877 -- Stack_Base current stack base
3878 -- Stack_Ptr current stack pointer
3881 Cursor := Stack (Stack_Ptr).Cursor;
3882 Node := Stack (Stack_Ptr).Node;
3883 Stack_Ptr := Stack_Ptr - 1;
3886 -- Come here if attempt to match current element succeeds
3888 -- Cursor current position in subject string
3889 -- Node pointer to node successfully matched
3890 -- Stack_Base current stack base
3891 -- Stack_Ptr current stack pointer
3896 -- Come here to match the next pattern element
3898 -- Cursor current position in subject string
3899 -- Node pointer to node to be matched
3900 -- Stack_Base current stack base
3901 -- Stack_Ptr current stack pointer
3905 --------------------------------------------------
3906 -- Main Pattern Match Element Matching Routines --
3907 --------------------------------------------------
3909 -- Here is the case statement that processes the current node. The
3910 -- processing for each element does one of five things:
3912 -- goto Succeed to move to the successor
3913 -- goto Match_Succeed if the entire match succeeds
3914 -- goto Match_Fail if the entire match fails
3915 -- goto Fail to signal failure of current match
3917 -- Processing is NOT allowed to fall through
3933 -- Any (one character case)
3937 and then Subject (Cursor + 1) = Node.Char
3939 Cursor := Cursor + 1;
3945 -- Any (character set case)
3949 and then Is_In (Subject (Cursor + 1), Node.CS)
3951 Cursor := Cursor + 1;
3957 -- Any (string function case)
3959 when PC_Any_VF => declare
3960 U : constant VString := Node.VF.all;
3965 Get_String (U, S, L);
3968 and then Is_In (Subject (Cursor + 1), S (1 .. L))
3970 Cursor := Cursor + 1;
3977 -- Any (string pointer case)
3979 when PC_Any_VP => declare
3980 U : constant VString := Node.VP.all;
3985 Get_String (U, S, L);
3988 and then Is_In (Subject (Cursor + 1), S (1 .. L))
3990 Cursor := Cursor + 1;
3997 -- Arb (initial match)
4007 if Cursor < Length then
4008 Cursor := Cursor + 1;
4015 -- Arbno_S (simple Arbno initialize). This is the node that
4016 -- initiates the match of a simple Arbno structure.
4023 -- Arbno_X (Arbno initialize). This is the node that initiates
4024 -- the match of a complex Arbno structure.
4031 -- Arbno_Y (Arbno rematch). This is the node that is executed
4032 -- following successful matching of one instance of a complex
4035 when PC_Arbno_Y => declare
4036 Null_Match : constant Boolean :=
4037 Cursor = Stack (Stack_Base - 1).Cursor;
4042 -- If arbno extension matched null, then immediately fail
4048 -- Here we must do a stack check to make sure enough stack
4049 -- is left. This check will happen once for each instance of
4050 -- the Arbno pattern that is matched. The Nat field of a
4051 -- PC_Arbno pattern contains the maximum stack entries needed
4052 -- for the Arbno with one instance and the successor pattern
4054 if Stack_Ptr + Node.Nat >= Stack'Last then
4055 raise Pattern_Stack_Overflow;
4061 -- Assign. If this node is executed, it means the assign-on-match
4062 -- or write-on-match operation will not happen after all, so we
4063 -- is propagate the failure, removing the PC_Assign node.
4068 -- Assign immediate. This node performs the actual assignment
4070 when PC_Assign_Imm =>
4073 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4077 -- Assign on match. This node sets up for the eventual assignment
4079 when PC_Assign_OnM =>
4080 Stack (Stack_Base - 1).Node := Node;
4081 Push (CP_Assign'Access);
4089 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4092 elsif Subject (Cursor + 1) = '(' then
4094 Paren_Count : Natural := 1;
4098 Cursor := Cursor + 1;
4100 if Cursor >= Length then
4103 elsif Subject (Cursor + 1) = '(' then
4104 Paren_Count := Paren_Count + 1;
4106 elsif Subject (Cursor + 1) = ')' then
4107 Paren_Count := Paren_Count - 1;
4108 exit when Paren_Count = 0;
4114 Cursor := Cursor + 1;
4118 -- Break (one character case)
4121 while Cursor < Length loop
4122 if Subject (Cursor + 1) = Node.Char then
4125 Cursor := Cursor + 1;
4131 -- Break (character set case)
4134 while Cursor < Length loop
4135 if Is_In (Subject (Cursor + 1), Node.CS) then
4138 Cursor := Cursor + 1;
4144 -- Break (string function case)
4146 when PC_Break_VF => declare
4147 U : constant VString := Node.VF.all;
4152 Get_String (U, S, L);
4154 while Cursor < Length loop
4155 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4158 Cursor := Cursor + 1;
4165 -- Break (string pointer case)
4167 when PC_Break_VP => declare
4168 U : constant VString := Node.VP.all;
4173 Get_String (U, S, L);
4175 while Cursor < Length loop
4176 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4179 Cursor := Cursor + 1;
4186 -- BreakX (one character case)
4188 when PC_BreakX_CH =>
4189 while Cursor < Length loop
4190 if Subject (Cursor + 1) = Node.Char then
4193 Cursor := Cursor + 1;
4199 -- BreakX (character set case)
4201 when PC_BreakX_CS =>
4202 while Cursor < Length loop
4203 if Is_In (Subject (Cursor + 1), Node.CS) then
4206 Cursor := Cursor + 1;
4212 -- BreakX (string function case)
4214 when PC_BreakX_VF => declare
4215 U : constant VString := Node.VF.all;
4220 Get_String (U, S, L);
4222 while Cursor < Length loop
4223 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4226 Cursor := Cursor + 1;
4233 -- BreakX (string pointer case)
4235 when PC_BreakX_VP => declare
4236 U : constant VString := Node.VP.all;
4241 Get_String (U, S, L);
4243 while Cursor < Length loop
4244 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4247 Cursor := Cursor + 1;
4254 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4255 -- Structures". This node is the alternative that is stacked to
4256 -- skip past the break character and extend the break.
4259 Cursor := Cursor + 1;
4262 -- Character (one character string)
4266 and then Subject (Cursor + 1) = Node.Char
4268 Cursor := Cursor + 1;
4277 if Stack_Base = Stack_Init then
4280 -- End of recursive inner match. See separate section on
4281 -- handing of recursive pattern matches for details.
4284 Node := Stack (Stack_Base - 1).Node;
4294 -- Fence (built in pattern)
4297 Push (CP_Cancel'Access);
4300 -- Fence function node X. This is the node that gets control
4301 -- after a successful match of the fenced pattern.
4304 Stack_Ptr := Stack_Ptr + 1;
4305 Stack (Stack_Ptr).Cursor := Stack_Base;
4306 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4307 Stack_Base := Stack (Stack_Base).Cursor;
4310 -- Fence function node Y. This is the node that gets control on
4311 -- a failure that occurs after the fenced pattern has matched.
4313 -- Note: the Cursor at this stage is actually the inner stack
4314 -- base value. We don't reset this, but we do use it to strip
4315 -- off all the entries made by the fenced pattern.
4318 Stack_Ptr := Cursor - 2;
4321 -- Len (integer case)
4324 if Cursor + Node.Nat > Length then
4327 Cursor := Cursor + Node.Nat;
4331 -- Len (Integer function case)
4333 when PC_Len_NF => declare
4334 N : constant Natural := Node.NF.all;
4336 if Cursor + N > Length then
4339 Cursor := Cursor + N;
4344 -- Len (integer pointer case)
4347 if Cursor + Node.NP.all > Length then
4350 Cursor := Cursor + Node.NP.all;
4354 -- NotAny (one character case)
4356 when PC_NotAny_CH =>
4358 and then Subject (Cursor + 1) /= Node.Char
4360 Cursor := Cursor + 1;
4366 -- NotAny (character set case)
4368 when PC_NotAny_CS =>
4370 and then not Is_In (Subject (Cursor + 1), Node.CS)
4372 Cursor := Cursor + 1;
4378 -- NotAny (string function case)
4380 when PC_NotAny_VF => declare
4381 U : constant VString := Node.VF.all;
4386 Get_String (U, S, L);
4390 not Is_In (Subject (Cursor + 1), S (1 .. L))
4392 Cursor := Cursor + 1;
4399 -- NotAny (string pointer case)
4401 when PC_NotAny_VP => declare
4402 U : constant VString := Node.VP.all;
4407 Get_String (U, S, L);
4411 not Is_In (Subject (Cursor + 1), S (1 .. L))
4413 Cursor := Cursor + 1;
4420 -- NSpan (one character case)
4423 while Cursor < Length
4424 and then Subject (Cursor + 1) = Node.Char
4426 Cursor := Cursor + 1;
4431 -- NSpan (character set case)
4434 while Cursor < Length
4435 and then Is_In (Subject (Cursor + 1), Node.CS)
4437 Cursor := Cursor + 1;
4442 -- NSpan (string function case)
4444 when PC_NSpan_VF => declare
4445 U : constant VString := Node.VF.all;
4450 Get_String (U, S, L);
4452 while Cursor < Length
4453 and then Is_In (Subject (Cursor + 1), S (1 .. L))
4455 Cursor := Cursor + 1;
4461 -- NSpan (string pointer case)
4463 when PC_NSpan_VP => declare
4464 U : constant VString := Node.VP.all;
4469 Get_String (U, S, L);
4471 while Cursor < Length
4472 and then Is_In (Subject (Cursor + 1), S (1 .. L))
4474 Cursor := Cursor + 1;
4485 -- Pos (integer case)
4488 if Cursor = Node.Nat then
4494 -- Pos (Integer function case)
4496 when PC_Pos_NF => declare
4497 N : constant Natural := Node.NF.all;
4506 -- Pos (integer pointer case)
4509 if Cursor = Node.NP.all then
4515 -- Predicate function
4517 when PC_Pred_Func =>
4524 -- Region Enter. Initiate new pattern history stack region
4527 Stack (Stack_Ptr + 1).Cursor := Cursor;
4531 -- Region Remove node. This is the node stacked by an R_Enter.
4532 -- It removes the special format stack entry right underneath, and
4533 -- then restores the outer level stack base and signals failure.
4535 -- Note: the cursor value at this stage is actually the (negative)
4536 -- stack base value for the outer level.
4539 Stack_Base := Cursor;
4540 Stack_Ptr := Stack_Ptr - 1;
4543 -- Region restore node. This is the node stacked at the end of an
4544 -- inner level match. Its function is to restore the inner level
4545 -- region, so that alternatives in this region can be sought.
4547 -- Note: the Cursor at this stage is actually the negative of the
4548 -- inner stack base value, which we use to restore the inner region.
4550 when PC_R_Restore =>
4551 Stack_Base := Cursor;
4560 -- Initiate recursive match (pattern pointer case)
4563 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4566 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4567 raise Pattern_Stack_Overflow;
4569 Node := Node.PP.all.P;
4573 -- RPos (integer case)
4576 if Cursor = (Length - Node.Nat) then
4582 -- RPos (integer function case)
4584 when PC_RPos_NF => declare
4585 N : constant Natural := Node.NF.all;
4587 if Length - Cursor = N then
4594 -- RPos (integer pointer case)
4597 if Cursor = (Length - Node.NP.all) then
4603 -- RTab (integer case)
4606 if Cursor <= (Length - Node.Nat) then
4607 Cursor := Length - Node.Nat;
4613 -- RTab (integer function case)
4615 when PC_RTab_NF => declare
4616 N : constant Natural := Node.NF.all;
4618 if Length - Cursor >= N then
4619 Cursor := Length - N;
4626 -- RTab (integer pointer case)
4629 if Cursor <= (Length - Node.NP.all) then
4630 Cursor := Length - Node.NP.all;
4636 -- Cursor assignment
4639 Node.Var.all := Cursor;
4642 -- Span (one character case)
4644 when PC_Span_CH => declare
4650 and then Subject (P + 1) = Node.Char
4663 -- Span (character set case)
4665 when PC_Span_CS => declare
4671 and then Is_In (Subject (P + 1), Node.CS)
4684 -- Span (string function case)
4686 when PC_Span_VF => declare
4687 U : constant VString := Node.VF.all;
4693 Get_String (U, S, L);
4697 and then Is_In (Subject (P + 1), S (1 .. L))
4710 -- Span (string pointer case)
4712 when PC_Span_VP => declare
4713 U : constant VString := Node.VP.all;
4719 Get_String (U, S, L);
4723 and then Is_In (Subject (P + 1), S (1 .. L))
4736 -- String (two character case)
4739 if (Length - Cursor) >= 2
4740 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4742 Cursor := Cursor + 2;
4748 -- String (three character case)
4751 if (Length - Cursor) >= 3
4752 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4754 Cursor := Cursor + 3;
4760 -- String (four character case)
4763 if (Length - Cursor) >= 4
4764 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4766 Cursor := Cursor + 4;
4772 -- String (five character case)
4775 if (Length - Cursor) >= 5
4776 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4778 Cursor := Cursor + 5;
4784 -- String (six character case)
4787 if (Length - Cursor) >= 6
4788 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4790 Cursor := Cursor + 6;
4796 -- String (case of more than six characters)
4798 when PC_String => declare
4799 Len : constant Natural := Node.Str'Length;
4801 if (Length - Cursor) >= Len
4802 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4804 Cursor := Cursor + Len;
4811 -- String (function case)
4813 when PC_String_VF => declare
4814 U : constant VString := Node.VF.all;
4819 Get_String (U, S, L);
4821 if (Length - Cursor) >= L
4822 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
4824 Cursor := Cursor + L;
4831 -- String (pointer case)
4833 when PC_String_VP => declare
4834 U : constant VString := Node.VP.all;
4839 Get_String (U, S, L);
4841 if (Length - Cursor) >= L
4842 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
4844 Cursor := Cursor + L;
4857 -- Tab (integer case)
4860 if Cursor <= Node.Nat then
4867 -- Tab (integer function case)
4869 when PC_Tab_NF => declare
4870 N : constant Natural := Node.NF.all;
4880 -- Tab (integer pointer case)
4883 if Cursor <= Node.NP.all then
4884 Cursor := Node.NP.all;
4890 -- Unanchored movement
4892 when PC_Unanchored =>
4894 -- All done if we tried every position
4896 if Cursor > Length then
4899 -- Otherwise extend the anchor point, and restack ourself
4902 Cursor := Cursor + 1;
4907 -- Write immediate. This node performs the actual write
4909 when PC_Write_Imm =>
4912 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4916 -- Write on match. This node sets up for the eventual write
4918 when PC_Write_OnM =>
4919 Stack (Stack_Base - 1).Node := Node;
4920 Push (CP_Assign'Access);
4927 -- We are NOT allowed to fall though this case statement, since every
4928 -- match routine must end by executing a goto to the appropriate point
4929 -- in the finite state machine model.
4931 pragma Warnings (Off);
4933 pragma Warnings (On);
4940 -- Maintenance note: There is a LOT of code duplication between XMatch
4941 -- and XMatchD. This is quite intentional, the point is to avoid any
4942 -- unnecessary debugging overhead in the XMatch case, but this does mean
4943 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4944 -- any major changes, the proper approach is to delete XMatch, make the
4945 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4946 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4953 Start : out Natural;
4957 -- Pointer to current pattern node. Initialized from Pat_P, and then
4958 -- updated as the match proceeds through its constituent elements.
4960 Length : constant Natural := Subject'Length;
4961 -- Length of string (= Subject'Last, since Subject'First is always 1)
4963 Cursor : Integer := 0;
4964 -- If the value is non-negative, then this value is the index showing
4965 -- the current position of the match in the subject string. The next
4966 -- character to be matched is at Subject (Cursor + 1). Note that since
4967 -- our view of the subject string in XMatch always has a lower bound
4968 -- of one, regardless of original bounds, that this definition exactly
4969 -- corresponds to the cursor value as referenced by functions like Pos.
4971 -- If the value is negative, then this is a saved stack pointer,
4972 -- typically a base pointer of an inner or outer region. Cursor
4973 -- temporarily holds such a value when it is popped from the stack
4974 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4975 -- cursor value before the match proceeds (e.g. by propagating the
4976 -- failure and popping a "real" cursor value from the stack.
4978 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4979 -- Dummy pattern element used in the unanchored case
4981 Region_Level : Natural := 0;
4982 -- Keeps track of recursive region level. This is used only for
4983 -- debugging, it is the number of saved history stack base values.
4986 -- The pattern matching failure stack for this call to Match
4988 Stack_Ptr : Stack_Range;
4989 -- Current stack pointer. This points to the top element of the stack
4990 -- that is currently in use. At the outer level this is the special
4991 -- entry placed on the stack according to the anchor mode.
4993 Stack_Init : constant Stack_Range := Stack'First + 1;
4994 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4995 -- initial (Stack'First) element of the stack is not used so that
4996 -- when we pop the last element off, Stack_Ptr is still in range.
4998 Stack_Base : Stack_Range;
4999 -- This value is the stack base value, i.e. the stack pointer for the
5000 -- first history stack entry in the current stack region. See separate
5001 -- section on handling of recursive pattern matches.
5003 Assign_OnM : Boolean := False;
5004 -- Set True if assign-on-match or write-on-match operations may be
5005 -- present in the history stack, which must then be scanned on a
5006 -- successful match.
5008 procedure Dout (Str : String);
5009 -- Output string to standard error with bars indicating region level
5011 procedure Dout (Str : String; A : Character);
5012 -- Calls Dout with the string S ('A')
5014 procedure Dout (Str : String; A : Character_Set);
5015 -- Calls Dout with the string S ("A")
5017 procedure Dout (Str : String; A : Natural);
5018 -- Calls Dout with the string S (A)
5020 procedure Dout (Str : String; A : String);
5021 -- Calls Dout with the string S ("A")
5023 function Img (P : PE_Ptr) return String;
5024 -- Returns a string of the form #nnn where nnn is P.Index
5026 procedure Pop_Region;
5027 pragma Inline (Pop_Region);
5028 -- Used at the end of processing of an inner region. if the inner
5029 -- region left no stack entries, then all trace of it is removed.
5030 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
5031 -- handling of alternatives in the inner region.
5033 procedure Push (Node : PE_Ptr);
5034 pragma Inline (Push);
5035 -- Make entry in pattern matching stack with current cursor valeu
5037 procedure Push_Region;
5038 pragma Inline (Push_Region);
5039 -- This procedure makes a new region on the history stack. The
5040 -- caller first establishes the special entry on the stack, but
5041 -- does not push the stack pointer. Then this call stacks a
5042 -- PC_Remove_Region node, on top of this entry, using the cursor
5043 -- field of the PC_Remove_Region entry to save the outer level
5044 -- stack base value, and resets the stack base to point to this
5045 -- PC_Remove_Region node.
5051 procedure Dout (Str : String) is
5053 for J in 1 .. Region_Level loop
5060 procedure Dout (Str : String; A : Character) is
5062 Dout (Str & " ('" & A & "')");
5065 procedure Dout (Str : String; A : Character_Set) is
5067 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
5070 procedure Dout (Str : String; A : Natural) is
5072 Dout (Str & " (" & A & ')');
5075 procedure Dout (Str : String; A : String) is
5077 Dout (Str & " (" & Image (A) & ')');
5084 function Img (P : PE_Ptr) return String is
5086 return "#" & Integer (P.Index) & " ";
5093 procedure Pop_Region is
5095 Region_Level := Region_Level - 1;
5097 -- If nothing was pushed in the inner region, we can just get
5098 -- rid of it entirely, leaving no traces that it was ever there
5100 if Stack_Ptr = Stack_Base then
5101 Stack_Ptr := Stack_Base - 2;
5102 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
5104 -- If stuff was pushed in the inner region, then we have to
5105 -- push a PC_R_Restore node so that we properly handle possible
5106 -- rematches within the region.
5109 Stack_Ptr := Stack_Ptr + 1;
5110 Stack (Stack_Ptr).Cursor := Stack_Base;
5111 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5112 Stack_Base := Stack (Stack_Base).Cursor;
5120 procedure Push (Node : PE_Ptr) is
5122 Stack_Ptr := Stack_Ptr + 1;
5123 Stack (Stack_Ptr).Cursor := Cursor;
5124 Stack (Stack_Ptr).Node := Node;
5131 procedure Push_Region is
5133 Region_Level := Region_Level + 1;
5134 Stack_Ptr := Stack_Ptr + 2;
5135 Stack (Stack_Ptr).Cursor := Stack_Base;
5136 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5137 Stack_Base := Stack_Ptr;
5140 -- Start of processing for XMatchD
5144 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5145 Put ("--------------------------------------");
5147 for J in 1 .. Length loop
5152 Put_Line ("subject length = " & Length);
5154 if Pat_P = null then
5155 Uninitialized_Pattern;
5158 -- Check we have enough stack for this pattern. This check deals with
5159 -- every possibility except a match of a recursive pattern, where we
5160 -- make a check at each recursion level.
5162 if Pat_S >= Stack_Size - 1 then
5163 raise Pattern_Stack_Overflow;
5166 -- In anchored mode, the bottom entry on the stack is an abort entry
5168 if Anchored_Mode then
5169 Stack (Stack_Init).Node := CP_Cancel'Access;
5170 Stack (Stack_Init).Cursor := 0;
5172 -- In unanchored more, the bottom entry on the stack references
5173 -- the special pattern element PE_Unanchored, whose Pthen field
5174 -- points to the initial pattern element. The cursor value in this
5175 -- entry is the number of anchor moves so far.
5178 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5179 Stack (Stack_Init).Cursor := 0;
5182 Stack_Ptr := Stack_Init;
5183 Stack_Base := Stack_Ptr;
5188 -----------------------------------------
5189 -- Main Pattern Matching State Control --
5190 -----------------------------------------
5192 -- This is a state machine which uses gotos to change state. The
5193 -- initial state is Match, to initiate the matching of the first
5194 -- element, so the goto Match above starts the match. In the
5195 -- following descriptions, we indicate the global values that
5196 -- are relevant for the state transition.
5198 -- Come here if entire match fails
5201 Dout ("match fails");
5207 -- Come here if entire match succeeds
5209 -- Cursor current position in subject string
5212 Dout ("match succeeds");
5213 Start := Stack (Stack_Init).Cursor + 1;
5215 Dout ("first matched character index = " & Start);
5216 Dout ("last matched character index = " & Stop);
5217 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5219 -- Scan history stack for deferred assignments or writes
5222 for S in Stack'First .. Stack_Ptr loop
5223 if Stack (S).Node = CP_Assign'Access then
5225 Inner_Base : constant Stack_Range :=
5226 Stack (S + 1).Cursor;
5227 Special_Entry : constant Stack_Range :=
5229 Node_OnM : constant PE_Ptr :=
5230 Stack (Special_Entry).Node;
5231 Start : constant Natural :=
5232 Stack (Special_Entry).Cursor + 1;
5233 Stop : constant Natural := Stack (S).Cursor;
5236 if Node_OnM.Pcode = PC_Assign_OnM then
5237 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5239 (Img (Stack (S).Node) &
5240 "deferred assignment of " &
5241 Image (Subject (Start .. Stop)));
5243 elsif Node_OnM.Pcode = PC_Write_OnM then
5244 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5246 (Img (Stack (S).Node) &
5247 "deferred write of " &
5248 Image (Subject (Start .. Stop)));
5261 -- Come here if attempt to match current element fails
5263 -- Stack_Base current stack base
5264 -- Stack_Ptr current stack pointer
5267 Cursor := Stack (Stack_Ptr).Cursor;
5268 Node := Stack (Stack_Ptr).Node;
5269 Stack_Ptr := Stack_Ptr - 1;
5272 Dout ("failure, cursor reset to " & Cursor);
5277 -- Come here if attempt to match current element succeeds
5279 -- Cursor current position in subject string
5280 -- Node pointer to node successfully matched
5281 -- Stack_Base current stack base
5282 -- Stack_Ptr current stack pointer
5285 Dout ("success, cursor = " & Cursor);
5288 -- Come here to match the next pattern element
5290 -- Cursor current position in subject string
5291 -- Node pointer to node to be matched
5292 -- Stack_Base current stack base
5293 -- Stack_Ptr current stack pointer
5297 --------------------------------------------------
5298 -- Main Pattern Match Element Matching Routines --
5299 --------------------------------------------------
5301 -- Here is the case statement that processes the current node. The
5302 -- processing for each element does one of five things:
5304 -- goto Succeed to move to the successor
5305 -- goto Match_Succeed if the entire match succeeds
5306 -- goto Match_Fail if the entire match fails
5307 -- goto Fail to signal failure of current match
5309 -- Processing is NOT allowed to fall through
5316 Dout (Img (Node) & "matching Cancel");
5323 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5328 -- Any (one character case)
5331 Dout (Img (Node) & "matching Any", Node.Char);
5334 and then Subject (Cursor + 1) = Node.Char
5336 Cursor := Cursor + 1;
5342 -- Any (character set case)
5345 Dout (Img (Node) & "matching Any", Node.CS);
5348 and then Is_In (Subject (Cursor + 1), Node.CS)
5350 Cursor := Cursor + 1;
5356 -- Any (string function case)
5358 when PC_Any_VF => declare
5359 U : constant VString := Node.VF.all;
5364 Get_String (U, S, L);
5366 Dout (Img (Node) & "matching Any", S (1 .. L));
5369 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5371 Cursor := Cursor + 1;
5378 -- Any (string pointer case)
5380 when PC_Any_VP => declare
5381 U : constant VString := Node.VP.all;
5386 Get_String (U, S, L);
5387 Dout (Img (Node) & "matching Any", S (1 .. L));
5390 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5392 Cursor := Cursor + 1;
5399 -- Arb (initial match)
5402 Dout (Img (Node) & "matching Arb");
5410 Dout (Img (Node) & "extending Arb");
5412 if Cursor < Length then
5413 Cursor := Cursor + 1;
5420 -- Arbno_S (simple Arbno initialize). This is the node that
5421 -- initiates the match of a simple Arbno structure.
5425 "setting up Arbno alternative " & Img (Node.Alt));
5430 -- Arbno_X (Arbno initialize). This is the node that initiates
5431 -- the match of a complex Arbno structure.
5435 "setting up Arbno alternative " & Img (Node.Alt));
5440 -- Arbno_Y (Arbno rematch). This is the node that is executed
5441 -- following successful matching of one instance of a complex
5444 when PC_Arbno_Y => declare
5445 Null_Match : constant Boolean :=
5446 Cursor = Stack (Stack_Base - 1).Cursor;
5449 Dout (Img (Node) & "extending Arbno");
5452 -- If arbno extension matched null, then immediately fail
5455 Dout ("Arbno extension matched null, so fails");
5459 -- Here we must do a stack check to make sure enough stack
5460 -- is left. This check will happen once for each instance of
5461 -- the Arbno pattern that is matched. The Nat field of a
5462 -- PC_Arbno pattern contains the maximum stack entries needed
5463 -- for the Arbno with one instance and the successor pattern
5465 if Stack_Ptr + Node.Nat >= Stack'Last then
5466 raise Pattern_Stack_Overflow;
5472 -- Assign. If this node is executed, it means the assign-on-match
5473 -- or write-on-match operation will not happen after all, so we
5474 -- is propagate the failure, removing the PC_Assign node.
5477 Dout (Img (Node) & "deferred assign/write cancelled");
5480 -- Assign immediate. This node performs the actual assignment
5482 when PC_Assign_Imm =>
5484 (Img (Node) & "executing immediate assignment of " &
5485 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5488 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5492 -- Assign on match. This node sets up for the eventual assignment
5494 when PC_Assign_OnM =>
5495 Dout (Img (Node) & "registering deferred assignment");
5496 Stack (Stack_Base - 1).Node := Node;
5497 Push (CP_Assign'Access);
5505 Dout (Img (Node) & "matching or extending Bal");
5506 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5509 elsif Subject (Cursor + 1) = '(' then
5511 Paren_Count : Natural := 1;
5515 Cursor := Cursor + 1;
5517 if Cursor >= Length then
5520 elsif Subject (Cursor + 1) = '(' then
5521 Paren_Count := Paren_Count + 1;
5523 elsif Subject (Cursor + 1) = ')' then
5524 Paren_Count := Paren_Count - 1;
5525 exit when Paren_Count = 0;
5531 Cursor := Cursor + 1;
5535 -- Break (one character case)
5538 Dout (Img (Node) & "matching Break", Node.Char);
5540 while Cursor < Length loop
5541 if Subject (Cursor + 1) = Node.Char then
5544 Cursor := Cursor + 1;
5550 -- Break (character set case)
5553 Dout (Img (Node) & "matching Break", Node.CS);
5555 while Cursor < Length loop
5556 if Is_In (Subject (Cursor + 1), Node.CS) then
5559 Cursor := Cursor + 1;
5565 -- Break (string function case)
5567 when PC_Break_VF => declare
5568 U : constant VString := Node.VF.all;
5573 Get_String (U, S, L);
5574 Dout (Img (Node) & "matching Break", S (1 .. L));
5576 while Cursor < Length loop
5577 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5580 Cursor := Cursor + 1;
5587 -- Break (string pointer case)
5589 when PC_Break_VP => declare
5590 U : constant VString := Node.VP.all;
5595 Get_String (U, S, L);
5596 Dout (Img (Node) & "matching Break", S (1 .. L));
5598 while Cursor < Length loop
5599 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5602 Cursor := Cursor + 1;
5609 -- BreakX (one character case)
5611 when PC_BreakX_CH =>
5612 Dout (Img (Node) & "matching BreakX", Node.Char);
5614 while Cursor < Length loop
5615 if Subject (Cursor + 1) = Node.Char then
5618 Cursor := Cursor + 1;
5624 -- BreakX (character set case)
5626 when PC_BreakX_CS =>
5627 Dout (Img (Node) & "matching BreakX", Node.CS);
5629 while Cursor < Length loop
5630 if Is_In (Subject (Cursor + 1), Node.CS) then
5633 Cursor := Cursor + 1;
5639 -- BreakX (string function case)
5641 when PC_BreakX_VF => declare
5642 U : constant VString := Node.VF.all;
5647 Get_String (U, S, L);
5648 Dout (Img (Node) & "matching BreakX", S (1 .. L));
5650 while Cursor < Length loop
5651 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5654 Cursor := Cursor + 1;
5661 -- BreakX (string pointer case)
5663 when PC_BreakX_VP => declare
5664 U : constant VString := Node.VP.all;
5669 Get_String (U, S, L);
5670 Dout (Img (Node) & "matching BreakX", S (1 .. L));
5672 while Cursor < Length loop
5673 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5676 Cursor := Cursor + 1;
5683 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5684 -- Structures". This node is the alternative that is stacked
5685 -- to skip past the break character and extend the break.
5688 Dout (Img (Node) & "extending BreakX");
5689 Cursor := Cursor + 1;
5692 -- Character (one character string)
5695 Dout (Img (Node) & "matching '" & Node.Char & ''');
5698 and then Subject (Cursor + 1) = Node.Char
5700 Cursor := Cursor + 1;
5709 if Stack_Base = Stack_Init then
5710 Dout ("end of pattern");
5713 -- End of recursive inner match. See separate section on
5714 -- handing of recursive pattern matches for details.
5717 Dout ("terminating recursive match");
5718 Node := Stack (Stack_Base - 1).Node;
5726 Dout (Img (Node) & "matching Fail");
5729 -- Fence (built in pattern)
5732 Dout (Img (Node) & "matching Fence");
5733 Push (CP_Cancel'Access);
5736 -- Fence function node X. This is the node that gets control
5737 -- after a successful match of the fenced pattern.
5740 Dout (Img (Node) & "matching Fence function");
5741 Stack_Ptr := Stack_Ptr + 1;
5742 Stack (Stack_Ptr).Cursor := Stack_Base;
5743 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5744 Stack_Base := Stack (Stack_Base).Cursor;
5745 Region_Level := Region_Level - 1;
5748 -- Fence function node Y. This is the node that gets control on
5749 -- a failure that occurs after the fenced pattern has matched.
5751 -- Note: the Cursor at this stage is actually the inner stack
5752 -- base value. We don't reset this, but we do use it to strip
5753 -- off all the entries made by the fenced pattern.
5756 Dout (Img (Node) & "pattern matched by Fence caused failure");
5757 Stack_Ptr := Cursor - 2;
5760 -- Len (integer case)
5763 Dout (Img (Node) & "matching Len", Node.Nat);
5765 if Cursor + Node.Nat > Length then
5768 Cursor := Cursor + Node.Nat;
5772 -- Len (Integer function case)
5774 when PC_Len_NF => declare
5775 N : constant Natural := Node.NF.all;
5778 Dout (Img (Node) & "matching Len", N);
5780 if Cursor + N > Length then
5783 Cursor := Cursor + N;
5788 -- Len (integer pointer case)
5791 Dout (Img (Node) & "matching Len", Node.NP.all);
5793 if Cursor + Node.NP.all > Length then
5796 Cursor := Cursor + Node.NP.all;
5800 -- NotAny (one character case)
5802 when PC_NotAny_CH =>
5803 Dout (Img (Node) & "matching NotAny", Node.Char);
5806 and then Subject (Cursor + 1) /= Node.Char
5808 Cursor := Cursor + 1;
5814 -- NotAny (character set case)
5816 when PC_NotAny_CS =>
5817 Dout (Img (Node) & "matching NotAny", Node.CS);
5820 and then not Is_In (Subject (Cursor + 1), Node.CS)
5822 Cursor := Cursor + 1;
5828 -- NotAny (string function case)
5830 when PC_NotAny_VF => declare
5831 U : constant VString := Node.VF.all;
5836 Get_String (U, S, L);
5837 Dout (Img (Node) & "matching NotAny", S (1 .. L));
5841 not Is_In (Subject (Cursor + 1), S (1 .. L))
5843 Cursor := Cursor + 1;
5850 -- NotAny (string pointer case)
5852 when PC_NotAny_VP => declare
5853 U : constant VString := Node.VP.all;
5858 Get_String (U, S, L);
5859 Dout (Img (Node) & "matching NotAny", S (1 .. L));
5863 not Is_In (Subject (Cursor + 1), S (1 .. L))
5865 Cursor := Cursor + 1;
5872 -- NSpan (one character case)
5875 Dout (Img (Node) & "matching NSpan", Node.Char);
5877 while Cursor < Length
5878 and then Subject (Cursor + 1) = Node.Char
5880 Cursor := Cursor + 1;
5885 -- NSpan (character set case)
5888 Dout (Img (Node) & "matching NSpan", Node.CS);
5890 while Cursor < Length
5891 and then Is_In (Subject (Cursor + 1), Node.CS)
5893 Cursor := Cursor + 1;
5898 -- NSpan (string function case)
5900 when PC_NSpan_VF => declare
5901 U : constant VString := Node.VF.all;
5906 Get_String (U, S, L);
5907 Dout (Img (Node) & "matching NSpan", S (1 .. L));
5909 while Cursor < Length
5910 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5912 Cursor := Cursor + 1;
5918 -- NSpan (string pointer case)
5920 when PC_NSpan_VP => declare
5921 U : constant VString := Node.VP.all;
5926 Get_String (U, S, L);
5927 Dout (Img (Node) & "matching NSpan", S (1 .. L));
5929 while Cursor < Length
5930 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5932 Cursor := Cursor + 1;
5939 Dout (Img (Node) & "matching null");
5942 -- Pos (integer case)
5945 Dout (Img (Node) & "matching Pos", Node.Nat);
5947 if Cursor = Node.Nat then
5953 -- Pos (Integer function case)
5955 when PC_Pos_NF => declare
5956 N : constant Natural := Node.NF.all;
5959 Dout (Img (Node) & "matching Pos", N);
5968 -- Pos (integer pointer case)
5971 Dout (Img (Node) & "matching Pos", Node.NP.all);
5973 if Cursor = Node.NP.all then
5979 -- Predicate function
5981 when PC_Pred_Func =>
5982 Dout (Img (Node) & "matching predicate function");
5990 -- Region Enter. Initiate new pattern history stack region
5993 Dout (Img (Node) & "starting match of nested pattern");
5994 Stack (Stack_Ptr + 1).Cursor := Cursor;
5998 -- Region Remove node. This is the node stacked by an R_Enter.
5999 -- It removes the special format stack entry right underneath, and
6000 -- then restores the outer level stack base and signals failure.
6002 -- Note: the cursor value at this stage is actually the (negative)
6003 -- stack base value for the outer level.
6006 Dout ("failure, match of nested pattern terminated");
6007 Stack_Base := Cursor;
6008 Region_Level := Region_Level - 1;
6009 Stack_Ptr := Stack_Ptr - 1;
6012 -- Region restore node. This is the node stacked at the end of an
6013 -- inner level match. Its function is to restore the inner level
6014 -- region, so that alternatives in this region can be sought.
6016 -- Note: the Cursor at this stage is actually the negative of the
6017 -- inner stack base value, which we use to restore the inner region.
6019 when PC_R_Restore =>
6020 Dout ("failure, search for alternatives in nested pattern");
6021 Region_Level := Region_Level + 1;
6022 Stack_Base := Cursor;
6028 Dout (Img (Node) & "matching Rest");
6032 -- Initiate recursive match (pattern pointer case)
6035 Stack (Stack_Ptr + 1).Node := Node.Pthen;
6037 Dout (Img (Node) & "initiating recursive match");
6039 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
6040 raise Pattern_Stack_Overflow;
6042 Node := Node.PP.all.P;
6046 -- RPos (integer case)
6049 Dout (Img (Node) & "matching RPos", Node.Nat);
6051 if Cursor = (Length - Node.Nat) then
6057 -- RPos (integer function case)
6059 when PC_RPos_NF => declare
6060 N : constant Natural := Node.NF.all;
6063 Dout (Img (Node) & "matching RPos", N);
6065 if Length - Cursor = N then
6072 -- RPos (integer pointer case)
6075 Dout (Img (Node) & "matching RPos", Node.NP.all);
6077 if Cursor = (Length - Node.NP.all) then
6083 -- RTab (integer case)
6086 Dout (Img (Node) & "matching RTab", Node.Nat);
6088 if Cursor <= (Length - Node.Nat) then
6089 Cursor := Length - Node.Nat;
6095 -- RTab (integer function case)
6097 when PC_RTab_NF => declare
6098 N : constant Natural := Node.NF.all;
6101 Dout (Img (Node) & "matching RPos", N);
6103 if Length - Cursor >= N then
6104 Cursor := Length - N;
6111 -- RTab (integer pointer case)
6114 Dout (Img (Node) & "matching RPos", Node.NP.all);
6116 if Cursor <= (Length - Node.NP.all) then
6117 Cursor := Length - Node.NP.all;
6123 -- Cursor assignment
6126 Dout (Img (Node) & "matching Setcur");
6127 Node.Var.all := Cursor;
6130 -- Span (one character case)
6132 when PC_Span_CH => declare
6133 P : Natural := Cursor;
6136 Dout (Img (Node) & "matching Span", Node.Char);
6139 and then Subject (P + 1) = Node.Char
6152 -- Span (character set case)
6154 when PC_Span_CS => declare
6155 P : Natural := Cursor;
6158 Dout (Img (Node) & "matching Span", Node.CS);
6161 and then Is_In (Subject (P + 1), Node.CS)
6174 -- Span (string function case)
6176 when PC_Span_VF => declare
6177 U : constant VString := Node.VF.all;
6183 Get_String (U, S, L);
6184 Dout (Img (Node) & "matching Span", S (1 .. L));
6188 and then Is_In (Subject (P + 1), S (1 .. L))
6201 -- Span (string pointer case)
6203 when PC_Span_VP => declare
6204 U : constant VString := Node.VP.all;
6210 Get_String (U, S, L);
6211 Dout (Img (Node) & "matching Span", S (1 .. L));
6215 and then Is_In (Subject (P + 1), S (1 .. L))
6228 -- String (two character case)
6231 Dout (Img (Node) & "matching " & Image (Node.Str2));
6233 if (Length - Cursor) >= 2
6234 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6236 Cursor := Cursor + 2;
6242 -- String (three character case)
6245 Dout (Img (Node) & "matching " & Image (Node.Str3));
6247 if (Length - Cursor) >= 3
6248 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6250 Cursor := Cursor + 3;
6256 -- String (four character case)
6259 Dout (Img (Node) & "matching " & Image (Node.Str4));
6261 if (Length - Cursor) >= 4
6262 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6264 Cursor := Cursor + 4;
6270 -- String (five character case)
6273 Dout (Img (Node) & "matching " & Image (Node.Str5));
6275 if (Length - Cursor) >= 5
6276 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6278 Cursor := Cursor + 5;
6284 -- String (six character case)
6287 Dout (Img (Node) & "matching " & Image (Node.Str6));
6289 if (Length - Cursor) >= 6
6290 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6292 Cursor := Cursor + 6;
6298 -- String (case of more than six characters)
6300 when PC_String => declare
6301 Len : constant Natural := Node.Str'Length;
6304 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6306 if (Length - Cursor) >= Len
6307 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6309 Cursor := Cursor + Len;
6316 -- String (function case)
6318 when PC_String_VF => declare
6319 U : constant VString := Node.VF.all;
6324 Get_String (U, S, L);
6325 Dout (Img (Node) & "matching " & Image (S (1 .. L)));
6327 if (Length - Cursor) >= L
6328 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
6330 Cursor := Cursor + L;
6337 -- String (vstring pointer case)
6339 when PC_String_VP => declare
6340 U : constant VString := Node.VP.all;
6345 Get_String (U, S, L);
6346 Dout (Img (Node) & "matching " & Image (S (1 .. L)));
6348 if (Length - Cursor) >= L
6349 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
6351 Cursor := Cursor + L;
6361 Dout (Img (Node) & "matching Succeed");
6365 -- Tab (integer case)
6368 Dout (Img (Node) & "matching Tab", Node.Nat);
6370 if Cursor <= Node.Nat then
6377 -- Tab (integer function case)
6379 when PC_Tab_NF => declare
6380 N : constant Natural := Node.NF.all;
6383 Dout (Img (Node) & "matching Tab ", N);
6393 -- Tab (integer pointer case)
6396 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6398 if Cursor <= Node.NP.all then
6399 Cursor := Node.NP.all;
6405 -- Unanchored movement
6407 when PC_Unanchored =>
6408 Dout ("attempting to move anchor point");
6410 -- All done if we tried every position
6412 if Cursor > Length then
6415 -- Otherwise extend the anchor point, and restack ourself
6418 Cursor := Cursor + 1;
6423 -- Write immediate. This node performs the actual write
6425 when PC_Write_Imm =>
6426 Dout (Img (Node) & "executing immediate write of " &
6427 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6431 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6435 -- Write on match. This node sets up for the eventual write
6437 when PC_Write_OnM =>
6438 Dout (Img (Node) & "registering deferred write");
6439 Stack (Stack_Base - 1).Node := Node;
6440 Push (CP_Assign'Access);
6447 -- We are NOT allowed to fall though this case statement, since every
6448 -- match routine must end by executing a goto to the appropriate point
6449 -- in the finite state machine model.
6451 pragma Warnings (Off);
6453 pragma Warnings (On);
6456 end GNAT.Spitbol.Patterns;