1 /* gnu/regexp/REMatch.java
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package gnu.java.util.regex;
41 import gnu.java.lang.CPStringBuilder;
43 import java.io.Serializable;
46 * An instance of this class represents a match
47 * completed by a gnu.regexp matching function. It can be used
48 * to obtain relevant information about the location of a match
51 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
53 public final class REMatch implements Serializable, Cloneable
55 private String matchedText;
56 private CharIndexed matchedCharIndexed;
58 // These variables are package scope for fast access within the engine
59 int eflags; // execution flags this match was made using
61 // Offset in source text where match was tried. This is zero-based;
62 // the actual position in the source text is given by (offset + anchor).
65 // Anchor position refers to the index into the source input
66 // at which the matching operation began.
67 // This is also useful for the ANCHORINDEX option.
70 // Package scope; used by RE.
71 int index; // used while matching to mark current match position in input
72 // start1[i] is set when the i-th subexp starts. And start1[i] is copied
73 // to start[i] when the i-th subexp ends. So start[i] keeps the previously
74 // assigned value while the i-th subexp is being processed. This makes
75 // backreference to the i-th subexp within the i-th subexp possible.
76 int[] start; // start positions (relative to offset) for each (sub)exp.
77 int[] start1; // start positions (relative to offset) for each (sub)exp.
78 int[] end; // end positions for the same
79 // start[i] == -1 or end[i] == -1 means that the start/end position is void.
80 // start[i] == p or end[i] == p where p < 0 and p != -1 means that
81 // the actual start/end position is (p+1). Start/end positions may
82 // become negative when the subexpression is in a RETokenLookBehind.
83 boolean empty; // empty string matched. This flag is used only within
86 BacktrackStack backtrackStack;
88 public Object clone ()
92 REMatch copy = (REMatch) super.clone ();
94 copy.start = (int[]) start.clone ();
95 copy.start1 = (int[]) start1.clone ();
96 copy.end = (int[]) end.clone ();
100 catch (CloneNotSupportedException e)
102 throw new Error (); // doesn't happen
106 void assignFrom (REMatch other)
109 start1 = other.start1;
112 backtrackStack = other.backtrackStack;
115 REMatch (int subs, int anchor, int eflags)
117 start = new int[subs + 1];
118 start1 = new int[subs + 1];
119 end = new int[subs + 1];
120 this.anchor = anchor;
121 this.eflags = eflags;
125 void finish (CharIndexed text)
128 CPStringBuilder sb = new CPStringBuilder ();
130 for (i = 0; i < end[0]; i++)
131 sb.append (text.charAt (i));
132 matchedText = sb.toString ();
133 matchedCharIndexed = text;
134 for (i = 0; i < start.length; i++)
136 // If any subexpressions didn't terminate, they don't count
137 // TODO check if this code ever gets hit
138 if ((start[i] == -1) ^ (end[i] == -1))
144 backtrackStack = null;
147 /** Clears the current match and moves the offset to the new index. */
148 void clear (int index)
152 for (int i = 0; i < start.length; i++)
154 start[i] = start1[i] = end[i] = -1;
156 backtrackStack = null;
160 * Returns the string matching the pattern. This makes it convenient
161 * to write code like the following:
164 * REMatch myMatch = myExpression.getMatch(myString);<br>
165 * if (myMatch != null) System.out.println("Regexp found: "+myMatch);
168 public String toString ()
174 * Returns the index within the input text where the match in its entirety
177 public int getStartIndex ()
179 return offset + start[0];
183 * Returns the index within the input string where the match in
184 * its entirety ends. The return value is the next position after
185 * the end of the string; therefore, a match created by the
189 * <code>REMatch myMatch = myExpression.getMatch(myString);</code>
191 * can be viewed (given that myMatch is not null) by creating
193 * <code>String theMatch = myString.substring(myMatch.getStartIndex(),
194 * myMatch.getEndIndex());</code>
196 * But you can save yourself that work, since the <code>toString()</code>
197 * method (above) does exactly that for you.
199 public int getEndIndex ()
201 return offset + end[0];
205 * Returns the string matching the given subexpression. The subexpressions
206 * are indexed starting with one, not zero. That is, the subexpression
207 * identified by the first set of parentheses in a regular expression
208 * could be retrieved from an REMatch by calling match.toString(1).
210 * @param sub Index of the subexpression.
212 public String toString (int sub)
214 if ((sub >= start.length) || sub < 0)
215 throw new IndexOutOfBoundsException ("No group " + sub);
216 if (start[sub] == -1)
218 if (start[sub] >= 0 && end[sub] <= matchedText.length ())
219 return (matchedText.substring (start[sub], end[sub]));
222 // This case occurs with RETokenLookAhead or RETokenLookBehind.
223 CPStringBuilder sb = new CPStringBuilder ();
230 for (int i = start[0] + s; i < start[0] + e; i++)
231 sb.append (matchedCharIndexed.charAt (i));
232 return sb.toString ();
237 * Returns the index within the input string used to generate this match
238 * where subexpression number <i>sub</i> begins, or <code>-1</code> if
239 * the subexpression does not exist. The initial position is zero.
241 * @param sub Subexpression index
242 * @deprecated Use getStartIndex(int) instead.
244 public int getSubStartIndex (int sub)
246 if (sub >= start.length)
249 return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
253 * Returns the index within the input string used to generate this match
254 * where subexpression number <i>sub</i> begins, or <code>-1</code> if
255 * the subexpression does not exist. The initial position is zero.
257 * @param sub Subexpression index
258 * @since gnu.regexp 1.1.0
260 public int getStartIndex (int sub)
262 if (sub >= start.length)
265 return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
269 * Returns the index within the input string used to generate this match
270 * where subexpression number <i>sub</i> ends, or <code>-1</code> if
271 * the subexpression does not exist. The initial position is zero.
273 * @param sub Subexpression index
274 * @deprecated Use getEndIndex(int) instead
276 public int getSubEndIndex (int sub)
278 if (sub >= start.length)
281 return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
285 * Returns the index within the input string used to generate this match
286 * where subexpression number <i>sub</i> ends, or <code>-1</code> if
287 * the subexpression does not exist. The initial position is zero.
289 * @param sub Subexpression index
291 public int getEndIndex (int sub)
293 if (sub >= start.length)
296 return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1;
300 * Substitute the results of this match to create a new string.
301 * This is patterned after PERL, so the tokens to watch out for are
302 * <code>$0</code> through <code>$9</code>. <code>$0</code> matches
303 * the full substring matched; <code>$<i>n</i></code> matches
304 * subexpression number <i>n</i>.
305 * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
306 * if such subexpressions exist.
308 * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
310 public String substituteInto (String input)
312 // a la Perl, $0 is whole thing, $1 - $9 are subexpressions
313 CPStringBuilder output = new CPStringBuilder ();
315 for (pos = 0; pos < input.length () - 1; pos++)
317 if ((input.charAt (pos) == '$')
318 && (Character.isDigit (input.charAt (pos + 1))))
320 int val = Character.digit (input.charAt (++pos), 10);
322 while (pos1 < input.length () &&
323 Character.isDigit (input.charAt (pos1)))
326 val * 10 + Character.digit (input.charAt (pos1), 10);
327 if (val1 >= start.length)
334 if (val < start.length)
336 output.append (toString (val));
340 output.append (input.charAt (pos));
342 if (pos < input.length ())
343 output.append (input.charAt (pos));
344 return output.toString ();
347 /* The following are used for debugging purpose
348 public static String d(REMatch m) {
349 if (m == null) return "null";
350 else return "[" + m.index + "]";
353 public String substringUptoIndex(CharIndexed input) {
354 StringBuffer sb = new StringBuffer();
355 for (int i = 0; i < index; i++) {
356 sb.append(input.charAt(i));
358 return sb.toString();