OSDN Git Service

2011-03-14 Benjamin Kosnik <bkoz@redhat.com>
[pf3gnuchains/gcc-fork.git] / libstdc++-v3 / include / bits / regex_compiler.h
1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26  *  @file bits/regex_compiler.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __regex
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37   struct _Scanner_base
38   {
39     typedef unsigned int _StateT;
40
41     static constexpr _StateT _S_state_at_start    = 1 << 0;
42     static constexpr _StateT _S_state_in_brace    = 1 << 2;
43     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
44
45     virtual ~_Scanner_base() { };
46   };
47
48   //
49   // @brief Scans an input range for regex tokens.
50   //
51   // The %_Scanner class interprets the regular expression pattern in the input
52   // range passed to its constructor as a sequence of parse tokens passed to
53   // the regular expression compiler.  The sequence of tokens provided depends
54   // on the flag settings passed to the constructor:  different regular
55   // expression grammars will interpret the same input pattern in
56   // syntactically different ways.
57   //
58   template<typename _InputIterator>
59     class _Scanner: public _Scanner_base
60     {
61     public:
62       typedef _InputIterator                                        _IteratorT;
63       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
64       typedef std::basic_string<_CharT>                             _StringT;
65       typedef regex_constants::syntax_option_type                   _FlagT;
66       typedef const std::ctype<_CharT>                              _CtypeT;
67
68       // Token types returned from the scanner.
69       enum _TokenT
70       {
71         _S_token_anychar,
72         _S_token_backref,
73         _S_token_bracket_begin,
74         _S_token_bracket_end,
75         _S_token_inverse_class,
76         _S_token_char_class_name,
77         _S_token_closure0,
78         _S_token_closure1,
79         _S_token_collelem_multi,
80         _S_token_collelem_single,
81         _S_token_collsymbol,
82         _S_token_comma,
83         _S_token_dash,
84         _S_token_dup_count,
85         _S_token_eof,
86         _S_token_equiv_class_name,
87         _S_token_interval_begin,
88         _S_token_interval_end,
89         _S_token_line_begin,
90         _S_token_line_end,
91         _S_token_opt,
92         _S_token_or,
93         _S_token_ord_char,
94         _S_token_quoted_char,
95         _S_token_subexpr_begin,
96         _S_token_subexpr_end,
97         _S_token_word_begin,
98         _S_token_word_end,
99         _S_token_unknown
100       };
101
102     public:
103       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
104                std::locale __loc)
105       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
106         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
107       { _M_advance(); }
108
109       void
110       _M_advance();
111
112       _TokenT
113       _M_token() const
114       { return _M_curToken; }
115
116       const _StringT&
117       _M_value() const
118       { return _M_curValue; }
119
120 #ifdef _GLIBCXX_DEBUG
121       std::ostream&
122       _M_print(std::ostream&);
123 #endif
124
125     private:
126       void
127       _M_eat_escape();
128
129       void
130       _M_scan_in_brace();
131
132       void
133       _M_scan_in_bracket();
134
135       void
136       _M_eat_charclass();
137
138       void
139       _M_eat_equivclass();
140
141       void
142       _M_eat_collsymbol();
143
144     private:
145       _IteratorT  _M_current;
146       _IteratorT  _M_end;
147       _FlagT      _M_flags;
148       _CtypeT&    _M_ctype;
149       _TokenT     _M_curToken;
150       _StringT    _M_curValue;
151       _StateT     _M_state;
152     };
153
154   template<typename _InputIterator>
155     void
156     _Scanner<_InputIterator>::
157     _M_advance()
158     {
159       if (_M_current == _M_end)
160         {
161           _M_curToken = _S_token_eof;
162           return;
163         }
164
165       _CharT __c = *_M_current;
166       if (_M_state & _S_state_in_bracket)
167         {
168           _M_scan_in_bracket();
169           return;
170         }
171       if (_M_state & _S_state_in_brace)
172         {
173           _M_scan_in_brace();
174           return;
175         }
176 #if 0
177       // TODO: re-enable line anchors when _M_assertion is implemented.
178       // See PR libstdc++/47724
179       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
180         {
181           _M_curToken = _S_token_line_begin;
182           ++_M_current;
183           return;
184         }
185       else if (__c == _M_ctype.widen('$'))
186         {
187           _M_curToken = _S_token_line_end;
188           ++_M_current;
189           return;
190         }
191 #endif
192       else if (__c == _M_ctype.widen('.'))
193         {
194           _M_curToken = _S_token_anychar;
195           ++_M_current;
196           return;
197         }
198       else if (__c == _M_ctype.widen('*'))
199         {
200           _M_curToken = _S_token_closure0;
201           ++_M_current;
202           return;
203         }
204       else if (__c == _M_ctype.widen('+'))
205         {
206           _M_curToken = _S_token_closure1;
207           ++_M_current;
208           return;
209         }
210       else if (__c == _M_ctype.widen('|'))
211         {
212           _M_curToken = _S_token_or;
213           ++_M_current;
214           return;
215         }
216       else if (__c == _M_ctype.widen('['))
217         {
218           _M_curToken = _S_token_bracket_begin;
219           _M_state |= (_S_state_in_bracket | _S_state_at_start);
220           ++_M_current;
221           return;
222         }
223       else if (__c == _M_ctype.widen('\\'))
224         {
225           _M_eat_escape();
226           return;
227         }
228       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
229         {
230           if (__c == _M_ctype.widen('('))
231             {
232               _M_curToken = _S_token_subexpr_begin;
233               ++_M_current;
234               return;
235             }
236           else if (__c == _M_ctype.widen(')'))
237             {
238               _M_curToken = _S_token_subexpr_end;
239               ++_M_current;
240               return;
241             }
242           else if (__c == _M_ctype.widen('{'))
243             {
244               _M_curToken = _S_token_interval_begin;
245               _M_state |= _S_state_in_brace;
246               ++_M_current;
247               return;
248             }
249         }
250
251       _M_curToken = _S_token_ord_char;
252       _M_curValue.assign(1, __c);
253       ++_M_current;
254     }
255
256
257   template<typename _InputIterator>
258     void
259     _Scanner<_InputIterator>::
260     _M_scan_in_brace()
261     {
262       if (_M_ctype.is(_CtypeT::digit, *_M_current))
263         {
264           _M_curToken = _S_token_dup_count;
265           _M_curValue.assign(1, *_M_current);
266           ++_M_current;
267           while (_M_current != _M_end
268                  && _M_ctype.is(_CtypeT::digit, *_M_current))
269             {
270               _M_curValue += *_M_current;
271               ++_M_current;
272             }
273           return;
274         }
275       else if (*_M_current == _M_ctype.widen(','))
276         {
277           _M_curToken = _S_token_comma;
278           ++_M_current;
279           return;
280         }
281       if (_M_flags & (regex_constants::basic | regex_constants::grep))
282         {
283           if (*_M_current == _M_ctype.widen('\\'))
284             _M_eat_escape();
285         }
286       else 
287         {
288           if (*_M_current == _M_ctype.widen('}'))
289             {
290               _M_curToken = _S_token_interval_end;
291               _M_state &= ~_S_state_in_brace;
292               ++_M_current;
293               return;
294             }
295         }
296     }
297
298   template<typename _InputIterator>
299     void
300     _Scanner<_InputIterator>::
301     _M_scan_in_bracket()
302     {
303       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
304         {
305           _M_curToken = _S_token_inverse_class;
306           _M_state &= ~_S_state_at_start;
307           ++_M_current;
308           return;
309         }
310       else if (*_M_current == _M_ctype.widen('['))
311         {
312           ++_M_current;
313           if (_M_current == _M_end)
314             {
315               _M_curToken = _S_token_eof;
316               return;
317             }
318
319           if (*_M_current == _M_ctype.widen('.'))
320             {
321               _M_curToken = _S_token_collsymbol;
322               _M_eat_collsymbol();
323               return;
324             }
325           else if (*_M_current == _M_ctype.widen(':'))
326             {
327               _M_curToken = _S_token_char_class_name;
328               _M_eat_charclass();
329               return;
330             }
331           else if (*_M_current == _M_ctype.widen('='))
332             {
333               _M_curToken = _S_token_equiv_class_name;
334               _M_eat_equivclass();
335               return;
336             }
337         }
338       else if (*_M_current == _M_ctype.widen('-'))
339         {
340           _M_curToken = _S_token_dash;
341           ++_M_current;
342           return;
343         }
344       else if (*_M_current == _M_ctype.widen(']'))
345         {
346           if (!(_M_flags & regex_constants::ECMAScript)
347               || !(_M_state & _S_state_at_start))
348             {
349               // special case: only if  _not_ chr first after
350               // '[' or '[^' and if not ECMAscript
351               _M_curToken = _S_token_bracket_end;
352               ++_M_current;
353               return;
354             }
355         }
356       _M_curToken = _S_token_collelem_single;
357       _M_curValue.assign(1, *_M_current);
358       ++_M_current;
359     }
360
361   template<typename _InputIterator>
362     void
363     _Scanner<_InputIterator>::
364     _M_eat_escape()
365     {
366       ++_M_current;
367       if (_M_current == _M_end)
368         {
369           _M_curToken = _S_token_eof;
370           return;
371         }
372       _CharT __c = *_M_current;
373       ++_M_current;
374
375       if (__c == _M_ctype.widen('('))
376         {
377           if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
378             {
379               _M_curToken = _S_token_ord_char;
380               _M_curValue.assign(1, __c);
381             }
382           else
383             _M_curToken = _S_token_subexpr_begin;
384         }
385       else if (__c == _M_ctype.widen(')'))
386         {
387           if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
388             {
389               _M_curToken = _S_token_ord_char;
390               _M_curValue.assign(1, __c);
391             }
392           else
393             _M_curToken = _S_token_subexpr_end;
394         }
395       else if (__c == _M_ctype.widen('{'))
396         {
397           if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
398             {
399               _M_curToken = _S_token_ord_char;
400               _M_curValue.assign(1, __c);
401             }
402           else
403             {
404               _M_curToken = _S_token_interval_begin;
405               _M_state |= _S_state_in_brace;
406             }
407         }
408       else if (__c == _M_ctype.widen('}'))
409         {
410           if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
411             {
412               _M_curToken = _S_token_ord_char;
413               _M_curValue.assign(1, __c);
414             }
415           else
416             {
417               if (!(_M_state && _S_state_in_brace))
418                 __throw_regex_error(regex_constants::error_badbrace);
419               _M_state &= ~_S_state_in_brace;
420               _M_curToken = _S_token_interval_end;
421             }
422         }
423       else if (__c == _M_ctype.widen('x'))
424         {
425           ++_M_current;
426           if (_M_current == _M_end)
427             {
428               _M_curToken = _S_token_eof;
429               return;
430             }
431           if (_M_ctype.is(_CtypeT::digit, *_M_current))
432             {
433               _M_curValue.assign(1, *_M_current);
434               ++_M_current;
435               if (_M_current == _M_end)
436                 {
437                   _M_curToken = _S_token_eof;
438                   return;
439                 }
440               if (_M_ctype.is(_CtypeT::digit, *_M_current))
441                 {
442                   _M_curValue += *_M_current;
443                   ++_M_current;
444                   return;
445                 }
446             }
447         }
448       else if (__c == _M_ctype.widen('^')
449                || __c == _M_ctype.widen('.')
450                || __c == _M_ctype.widen('*')
451                || __c == _M_ctype.widen('$')
452                || __c == _M_ctype.widen('\\'))
453         {
454           _M_curToken = _S_token_ord_char;
455           _M_curValue.assign(1, __c);
456         }
457       else if (_M_ctype.is(_CtypeT::digit, __c))
458         {
459           _M_curToken = _S_token_backref;
460           _M_curValue.assign(1, __c);
461         }
462       else
463         __throw_regex_error(regex_constants::error_escape);
464     }
465
466
467   // Eats a character class or throwns an exception.
468   // current point to ':' delimiter on entry, char after ']' on return
469   template<typename _InputIterator>
470     void
471     _Scanner<_InputIterator>::
472     _M_eat_charclass()
473     {
474       ++_M_current; // skip ':'
475       if (_M_current == _M_end)
476         __throw_regex_error(regex_constants::error_ctype);
477       for (_M_curValue.clear();
478            _M_current != _M_end && *_M_current != _M_ctype.widen(':');
479            ++_M_current)
480         _M_curValue += *_M_current;
481       if (_M_current == _M_end)
482         __throw_regex_error(regex_constants::error_ctype);
483       ++_M_current; // skip ':'
484       if (*_M_current != _M_ctype.widen(']'))
485         __throw_regex_error(regex_constants::error_ctype);
486       ++_M_current; // skip ']'
487     }
488
489
490   template<typename _InputIterator>
491     void
492     _Scanner<_InputIterator>::
493     _M_eat_equivclass()
494     {
495       ++_M_current; // skip '='
496       if (_M_current == _M_end)
497         __throw_regex_error(regex_constants::error_collate);
498       for (_M_curValue.clear();
499            _M_current != _M_end && *_M_current != _M_ctype.widen('=');
500            ++_M_current)
501         _M_curValue += *_M_current;
502       if (_M_current == _M_end)
503         __throw_regex_error(regex_constants::error_collate);
504       ++_M_current; // skip '='
505       if (*_M_current != _M_ctype.widen(']'))
506         __throw_regex_error(regex_constants::error_collate);
507       ++_M_current; // skip ']'
508     }
509
510
511   template<typename _InputIterator>
512     void
513     _Scanner<_InputIterator>::
514     _M_eat_collsymbol()
515     {
516       ++_M_current; // skip '.'
517       if (_M_current == _M_end)
518         __throw_regex_error(regex_constants::error_collate);
519       for (_M_curValue.clear();
520            _M_current != _M_end && *_M_current != _M_ctype.widen('.');
521            ++_M_current)
522         _M_curValue += *_M_current;
523       if (_M_current == _M_end)
524         __throw_regex_error(regex_constants::error_collate);
525       ++_M_current; // skip '.'
526       if (*_M_current != _M_ctype.widen(']'))
527         __throw_regex_error(regex_constants::error_collate);
528       ++_M_current; // skip ']'
529     }
530
531 #ifdef _GLIBCXX_DEBUG
532   template<typename _InputIterator>
533     std::ostream&
534     _Scanner<_InputIterator>::
535     _M_print(std::ostream& ostr)
536     {
537       switch (_M_curToken)
538       {
539         case _S_token_anychar:
540           ostr << "any-character\n";
541           break;
542         case _S_token_backref:
543           ostr << "backref\n";
544           break;
545         case _S_token_bracket_begin:
546           ostr << "bracket-begin\n";
547           break;
548         case _S_token_bracket_end:
549           ostr << "bracket-end\n";
550           break;
551         case _S_token_char_class_name:
552           ostr << "char-class-name \"" << _M_curValue << "\"\n";
553           break;
554         case _S_token_closure0:
555           ostr << "closure0\n";
556           break;
557         case _S_token_closure1:
558           ostr << "closure1\n";
559           break;
560         case _S_token_collelem_multi:
561           ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
562           break;
563         case _S_token_collelem_single:
564           ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
565           break;
566         case _S_token_collsymbol:
567           ostr << "collsymbol \"" << _M_curValue << "\"\n";
568           break;
569         case _S_token_comma:
570           ostr << "comma\n";
571           break;
572         case _S_token_dash:
573           ostr << "dash\n";
574           break;
575         case _S_token_dup_count:
576           ostr << "dup count: " << _M_curValue << "\n";
577           break;
578         case _S_token_eof:
579           ostr << "EOF\n";
580           break;
581         case _S_token_equiv_class_name:
582           ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
583           break;
584         case _S_token_interval_begin:
585           ostr << "interval begin\n";
586           break;
587         case _S_token_interval_end:
588           ostr << "interval end\n";
589           break;
590         case _S_token_line_begin:
591           ostr << "line begin\n";
592           break;
593         case _S_token_line_end:
594           ostr << "line end\n";
595           break;
596         case _S_token_opt:
597           ostr << "opt\n";
598           break;
599         case _S_token_or:
600           ostr << "or\n";
601           break;
602         case _S_token_ord_char:
603           ostr << "ordinary character: \"" << _M_value() << "\"\n";
604           break;
605         case _S_token_quoted_char:
606           ostr << "quoted char\n";
607           break;
608         case _S_token_subexpr_begin:
609           ostr << "subexpr begin\n";
610           break;
611         case _S_token_subexpr_end:
612           ostr << "subexpr end\n";
613           break;
614         case _S_token_word_begin:
615           ostr << "word begin\n";
616           break;
617         case _S_token_word_end:
618           ostr << "word end\n";
619           break;
620         case _S_token_unknown:
621           ostr << "-- unknown token --\n";
622           break;
623       }
624       return ostr;
625     }
626 #endif
627
628   // Builds an NFA from an input iterator interval.
629   template<typename _InIter, typename _TraitsT>
630     class _Compiler
631     {
632     public:
633       typedef _InIter                                            _IterT;
634       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
635       typedef std::basic_string<_CharT>                          _StringT;
636       typedef regex_constants::syntax_option_type                _FlagT;
637
638     public:
639       _Compiler(const _InIter& __b, const _InIter& __e,
640                 _TraitsT& __traits, _FlagT __flags);
641
642       const _Nfa&
643       _M_nfa() const
644       { return _M_state_store; }
645
646     private:
647       typedef _Scanner<_InIter>                              _ScannerT;
648       typedef typename _ScannerT::_TokenT                    _TokenT;
649       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
650       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
651
652       // accepts a specific token or returns false.
653       bool
654       _M_match_token(_TokenT __token);
655
656       void
657       _M_disjunction();
658
659       bool
660       _M_alternative();
661
662       bool
663       _M_term();
664
665       bool
666       _M_assertion();
667
668       bool
669       _M_quantifier();
670
671       bool
672       _M_atom();
673
674       bool
675       _M_bracket_expression();
676
677       bool
678       _M_bracket_list(_RMatcherT& __matcher);
679
680       bool
681       _M_follow_list(_RMatcherT& __matcher);
682
683       bool
684       _M_follow_list2(_RMatcherT& __matcher);
685
686       bool
687       _M_expression_term(_RMatcherT& __matcher);
688
689       bool
690       _M_range_expression(_RMatcherT& __matcher);
691
692       bool
693       _M_start_range(_RMatcherT& __matcher);
694
695       bool
696       _M_collating_symbol(_RMatcherT& __matcher);
697
698       bool
699       _M_equivalence_class(_RMatcherT& __matcher);
700
701       bool
702       _M_character_class(_RMatcherT& __matcher);
703
704       int
705       _M_cur_int_value(int __radix);
706
707     private:
708       _TraitsT&      _M_traits;
709       _ScannerT      _M_scanner;
710       _StringT       _M_cur_value;
711       _Nfa           _M_state_store;
712       _StackT        _M_stack;
713     };
714
715   template<typename _InIter, typename _TraitsT>
716     _Compiler<_InIter, _TraitsT>::
717     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
718               _Compiler<_InIter, _TraitsT>::_FlagT __flags)
719     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
720       _M_state_store(__flags)
721     {
722       typedef _StartTagger<_InIter, _TraitsT> _Start;
723       typedef _EndTagger<_InIter, _TraitsT> _End;
724
725       _StateSeq __r(_M_state_store,
726                     _M_state_store._M_insert_subexpr_begin(_Start(0)));
727       _M_disjunction();
728       if (!_M_stack.empty())
729         {
730           __r._M_append(_M_stack.top());
731           _M_stack.pop();
732         }
733       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
734       __r._M_append(_M_state_store._M_insert_accept());
735     }
736
737   template<typename _InIter, typename _TraitsT>
738     bool
739     _Compiler<_InIter, _TraitsT>::
740     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
741     { 
742       if (token == _M_scanner._M_token())
743         {
744           _M_cur_value = _M_scanner._M_value();
745           _M_scanner._M_advance();
746           return true;
747         }
748       return false;
749     }
750
751   template<typename _InIter, typename _TraitsT>
752     void
753     _Compiler<_InIter, _TraitsT>::
754     _M_disjunction()
755     {
756       this->_M_alternative();
757       if (_M_match_token(_ScannerT::_S_token_or))
758         {
759           _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
760           this->_M_disjunction();
761           _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
762           _M_stack.push(_StateSeq(__alt1, __alt2));
763         }
764     }
765
766   template<typename _InIter, typename _TraitsT>
767     bool
768     _Compiler<_InIter, _TraitsT>::
769     _M_alternative()
770     {
771       if (this->_M_term())
772         {
773           _StateSeq __re = _M_stack.top(); _M_stack.pop();
774           this->_M_alternative();
775           if (!_M_stack.empty())
776             {
777               __re._M_append(_M_stack.top());
778               _M_stack.pop();
779             }
780           _M_stack.push(__re);
781           return true;
782         }
783       return false;
784     }
785
786   template<typename _InIter, typename _TraitsT>
787     bool
788     _Compiler<_InIter, _TraitsT>::
789     _M_term()
790     {
791       if (this->_M_assertion())
792         return true;
793       if (this->_M_atom())
794         {
795           this->_M_quantifier();
796           return true;
797         }
798       return false;
799     }
800
801   template<typename _InIter, typename _TraitsT>
802     bool
803     _Compiler<_InIter, _TraitsT>::
804     _M_assertion()
805     {
806       if (_M_match_token(_ScannerT::_S_token_line_begin))
807         {
808           // __m.push(_Matcher::_S_opcode_line_begin);
809           return true;
810         }
811       if (_M_match_token(_ScannerT::_S_token_line_end))
812         {
813           // __m.push(_Matcher::_S_opcode_line_end);
814           return true;
815         }
816       if (_M_match_token(_ScannerT::_S_token_word_begin))
817         {
818           // __m.push(_Matcher::_S_opcode_word_begin);
819           return true;
820         }
821       if (_M_match_token(_ScannerT::_S_token_word_end))
822         {
823           // __m.push(_Matcher::_S_opcode_word_end);
824           return true;
825         }
826       return false;
827     }
828
829   template<typename _InIter, typename _TraitsT>
830     bool
831     _Compiler<_InIter, _TraitsT>::
832     _M_quantifier()
833     {
834       if (_M_match_token(_ScannerT::_S_token_closure0))
835         {
836           if (_M_stack.empty())
837             __throw_regex_error(regex_constants::error_badrepeat);
838           _StateSeq __r(_M_stack.top(), -1);
839           __r._M_append(__r._M_front());
840           _M_stack.pop();
841           _M_stack.push(__r);
842           return true;
843         }
844       if (_M_match_token(_ScannerT::_S_token_closure1))
845         {
846           if (_M_stack.empty())
847             __throw_regex_error(regex_constants::error_badrepeat);
848           _StateSeq __r(_M_state_store,
849                         _M_state_store.
850                         _M_insert_alt(_S_invalid_state_id,
851                                       _M_stack.top()._M_front()));
852           _M_stack.top()._M_append(__r);
853           return true;
854         }
855       if (_M_match_token(_ScannerT::_S_token_opt))
856         {
857           if (_M_stack.empty())
858           __throw_regex_error(regex_constants::error_badrepeat);
859           _StateSeq __r(_M_stack.top(), -1);
860           _M_stack.pop();
861           _M_stack.push(__r);
862           return true;
863         }
864       if (_M_match_token(_ScannerT::_S_token_interval_begin))
865         {
866           if (_M_stack.empty())
867             __throw_regex_error(regex_constants::error_badrepeat);
868           if (!_M_match_token(_ScannerT::_S_token_dup_count))
869             __throw_regex_error(regex_constants::error_badbrace);
870           _StateSeq __r(_M_stack.top());
871           int __min_rep = _M_cur_int_value(10);
872           for (int __i = 1; __i < __min_rep; ++__i)
873             _M_stack.top()._M_append(__r._M_clone()); 
874           if (_M_match_token(_ScannerT::_S_token_comma))
875             if (_M_match_token(_ScannerT::_S_token_dup_count))
876               {
877                 int __n = _M_cur_int_value(10) - __min_rep;
878                 if (__n < 0)
879                   __throw_regex_error(regex_constants::error_badbrace);
880                 for (int __i = 0; __i < __n; ++__i)
881                   {
882                     _StateSeq __r(_M_state_store,
883                                   _M_state_store.
884                                   _M_insert_alt(_S_invalid_state_id,
885                                                 _M_stack.top()._M_front()));
886                     _M_stack.top()._M_append(__r);
887                   }
888               }
889             else
890               {
891                 _StateSeq __r(_M_stack.top(), -1);
892                 __r._M_push_back(__r._M_front());
893                 _M_stack.pop();
894                 _M_stack.push(__r);
895               }
896           if (!_M_match_token(_ScannerT::_S_token_interval_end))
897             __throw_regex_error(regex_constants::error_brace);
898           return true;
899         }
900       return false;
901     }
902
903   template<typename _InIter, typename _TraitsT>
904     bool
905     _Compiler<_InIter, _TraitsT>::
906     _M_atom()
907     {
908       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
909       typedef _StartTagger<_InIter, _TraitsT> _Start;
910       typedef _EndTagger<_InIter, _TraitsT> _End;
911
912       if (_M_match_token(_ScannerT::_S_token_anychar))
913         {
914           _M_stack.push(_StateSeq(_M_state_store,
915                                   _M_state_store._M_insert_matcher
916                                   (_AnyMatcher)));
917           return true;
918         }
919       if (_M_match_token(_ScannerT::_S_token_ord_char))
920         {
921           _M_stack.push(_StateSeq(_M_state_store,
922                                   _M_state_store._M_insert_matcher
923                                   (_CMatcher(_M_cur_value[0], _M_traits))));
924           return true;
925         }
926       if (_M_match_token(_ScannerT::_S_token_quoted_char))
927         {
928           // note that in the ECMA grammar, this case covers backrefs.
929           _M_stack.push(_StateSeq(_M_state_store,
930                                   _M_state_store._M_insert_matcher
931                                   (_CMatcher(_M_cur_value[0], _M_traits))));
932           return true;
933         }
934       if (_M_match_token(_ScannerT::_S_token_backref))
935         {
936           // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
937           return true;
938         }
939       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
940         {
941           int __mark = _M_state_store._M_sub_count();
942           _StateSeq __r(_M_state_store,
943                         _M_state_store.
944                         _M_insert_subexpr_begin(_Start(__mark)));
945           this->_M_disjunction();
946           if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
947             __throw_regex_error(regex_constants::error_paren);
948           if (!_M_stack.empty())
949             {
950               __r._M_append(_M_stack.top());
951               _M_stack.pop();
952             }
953           __r._M_append(_M_state_store._M_insert_subexpr_end
954                         (__mark, _End(__mark)));
955           _M_stack.push(__r);
956           return true;
957         }
958       return _M_bracket_expression();
959     }
960
961   template<typename _InIter, typename _TraitsT>
962     bool
963     _Compiler<_InIter, _TraitsT>::
964     _M_bracket_expression()
965     {
966       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
967         {
968           _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
969                                _M_traits);
970           if (!_M_bracket_list(__matcher)
971               || !_M_match_token(_ScannerT::_S_token_bracket_end))
972             __throw_regex_error(regex_constants::error_brack);
973           _M_stack.push(_StateSeq(_M_state_store,
974                                   _M_state_store._M_insert_matcher(__matcher)));
975           return true;
976         }
977       return false;
978     }
979
980   // If the dash is the last character in the bracket expression, it is not
981   // special.
982   template<typename _InIter, typename _TraitsT>
983     bool
984     _Compiler<_InIter, _TraitsT>::
985     _M_bracket_list(_RMatcherT& __matcher)
986     {
987       if (_M_follow_list(__matcher))
988         {
989           if (_M_match_token(_ScannerT::_S_token_dash))
990             __matcher._M_add_char(_M_cur_value[0]);
991           return true;
992         }
993       return false;
994     }
995
996   template<typename _InIter, typename _TraitsT>
997     bool
998     _Compiler<_InIter, _TraitsT>::
999     _M_follow_list(_RMatcherT& __matcher)
1000     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1001
1002   template<typename _InIter, typename _TraitsT>
1003     bool
1004     _Compiler<_InIter, _TraitsT>::
1005     _M_follow_list2(_RMatcherT& __matcher)
1006     {
1007       if (_M_expression_term(__matcher))
1008         return _M_follow_list2(__matcher);
1009       return true;
1010     }
1011
1012   template<typename _InIter, typename _TraitsT>
1013     bool
1014     _Compiler<_InIter, _TraitsT>::
1015     _M_expression_term(_RMatcherT& __matcher)
1016     {
1017       return (_M_collating_symbol(__matcher)
1018               || _M_character_class(__matcher)
1019               || _M_equivalence_class(__matcher)
1020               || (_M_start_range(__matcher)
1021                   && _M_range_expression(__matcher)));
1022     }
1023
1024   template<typename _InIter, typename _TraitsT>
1025     bool
1026     _Compiler<_InIter, _TraitsT>::
1027     _M_range_expression(_RMatcherT& __matcher)
1028     {
1029       if (!_M_collating_symbol(__matcher))
1030         if (!_M_match_token(_ScannerT::_S_token_dash))
1031           __throw_regex_error(regex_constants::error_range);
1032       __matcher._M_make_range();
1033       return true;
1034     }
1035
1036   template<typename _InIter, typename _TraitsT>
1037     bool
1038     _Compiler<_InIter, _TraitsT>::
1039     _M_start_range(_RMatcherT& __matcher)
1040     { return _M_match_token(_ScannerT::_S_token_dash); }
1041
1042   template<typename _InIter, typename _TraitsT>
1043     bool
1044     _Compiler<_InIter, _TraitsT>::
1045     _M_collating_symbol(_RMatcherT& __matcher)
1046     {
1047       if (_M_match_token(_ScannerT::_S_token_collelem_single))
1048         {
1049           __matcher._M_add_char(_M_cur_value[0]);
1050           return true;
1051         }
1052       if (_M_match_token(_ScannerT::_S_token_collsymbol))
1053         {
1054           __matcher._M_add_collating_element(_M_cur_value);
1055           return true;
1056         }
1057       return false;
1058     }
1059
1060   template<typename _InIter, typename _TraitsT>
1061     bool
1062     _Compiler<_InIter, _TraitsT>::
1063     _M_equivalence_class(_RMatcherT& __matcher)
1064     {
1065       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1066         {
1067           __matcher._M_add_equivalence_class(_M_cur_value);
1068           return true;
1069         }
1070       return false;
1071     }
1072
1073   template<typename _InIter, typename _TraitsT>
1074     bool
1075     _Compiler<_InIter, _TraitsT>::
1076     _M_character_class(_RMatcherT& __matcher)
1077     {
1078       if (_M_match_token(_ScannerT::_S_token_char_class_name))
1079         {
1080           __matcher._M_add_character_class(_M_cur_value);
1081           return true;
1082         }
1083       return false;
1084     }
1085
1086   template<typename _InIter, typename _TraitsT>
1087     int
1088     _Compiler<_InIter, _TraitsT>::
1089     _M_cur_int_value(int __radix)
1090     {
1091       int __v = 0;
1092       for (typename _StringT::size_type __i = 0;
1093            __i < _M_cur_value.length(); ++__i)
1094         __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1095       return __v;
1096     }
1097
1098   template<typename _InIter, typename _TraitsT>
1099     _AutomatonPtr
1100     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1101               regex_constants::syntax_option_type __f)
1102     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1103                                         __f)._M_nfa())); }
1104
1105 _GLIBCXX_END_NAMESPACE_VERSION
1106 } // namespace __regex
1107 } // namespace std
1108
1109 /* vim: set ts=8 sw=2 sts=2: */