-- 数値の解析を行なうためのnumber_lexerを新設。

author derui <derutakayu@user.sourceforge.jp>

Sat, 27 Jun 2009 00:53:19 +0000 (09:53 +0900)

committer derui <derutakayu@user.sourceforge.jp>

Sat, 27 Jun 2009 00:53:19 +0000 (09:53 +0900)
author derui <derutakayu@user.sourceforge.jp>
Sat, 27 Jun 2009 00:53:19 +0000 (09:53 +0900)
committer derui <derutakayu@user.sourceforge.jp>
Sat, 27 Jun 2009 00:53:19 +0000 (09:53 +0900)
diff --git a/number_lexer.h b/number_lexer.h

new file mode 100644 (file)

index 0000000..153717d
--- /dev/null
+++ b/number_lexer.h
@@ -0,0 +1,270 @@
+#ifndef _NUMBER_LEXER_H_
+#define _NUMBER_LEXER_H_
+
+#include <sstream>
+#include "utf8_string.h"
+#include "smart_ptr.h"
+#include "utf8.h"
+#include "lexeme.h"
+#include "sublexer.h"
+
+
+namespace utakata {
+
+    namespace sublexer {
+    
+        /////////////////////////////////////////////////////////////////
+        // <number>がそれぞれのradixに基づいて解析する必要があるため、 //
+        // それぞれに対して特化できるようなテンプレートを用意する。    //
+        /////////////////////////////////////////////////////////////////
+
+        template<class T, int num>
+        class NumberLexerTemp
+        {
+
+            const int INIT;
+            const int REAL;
+        public:
+            NumberLexerTemp() : checker_(), exact_(false) {}
+            virtual ~NumberLexerTemp() {}
+
+            smart_ptr<lexeme::ILexeme> lex(smart_ptr<utf8::UTF8InputStream> stream,
+                                           smart_ptr<ISubLexer>& next,
+                                           bool exactness)
+                {
+                    // 全体的な流れは同一であり、基数の違いによる
+                    // 数値の違い程度しか問題は発生しない。
+                    exact_ = exactness;
+                    lexer_delimiter::Normal nor;
+                    utf8_string::UTF8String number;
+                    while (true) {
+                        // デリミタまで読んでしまう。
+                        if (stream->isEOF())
+                        {
+                            throw LexException("illegal number format");
+                        }
+                        
+                        utf8_string::UTF8Char ch(stream->read());
+                        if (nor(ch))
+                        {
+                            stream->unget(ch.getBytes());
+                            break;
+                        }
+
+                        number += ch;
+                    }
+
+                    // 読出したnumberを解析してNumberオブジェクトとして返す。
+                    return lexNumber_(number);
+                }
+
+        private:
+
+            smart_ptr<lexeme::ILexeme> lexNumber_(const utf8_string::UTF8String& number)
+                {
+                    // デリミタまでを取得したデータを、実際の数値オブジェクトとして解釈させる。
+                    // 解釈できなかった場合、字句構文エラーとして扱う。
+                    utf8_string::UTF8String::const_utf8iterator it = number.begin(),
+                        end = number.end();
+
+                    int status = INIT, prevstatus = INIT;
+                    
+                    while (begin != end) {
+
+                        switch (status) {
+                        case INIT:
+                        {
+                            // 非終端記号で言えば<complex>に当たる。
+                            // 全ての整数は原則として +-0iの虚数と等しいため、そのようになる。
+                            
+                            // 一文字単位で調べていく。checker_に該当するデータの場合には、
+                            prevstatus = status;
+                            status = complex_(begin, end);
+                        }
+                        break;
+                        case FLAG:
+                        {
+                            smart_ptr<utf8_string::UTF8String> s = real_(begin, end);
+                            
+                            return lexeme::makeNumber(s, exact_);
+                        }
+                        break;
+                        // 各自確定するときはそのまま出力するようにしても構わない。
+                        case NAN_IMAGINARY: return lexeme::makeNanImaginary(exact_);
+                        case INF_IMAGINARY: return lexeme::makeInfImaginary(exact_);
+                        case IMAGINARY_ONLY: return lexeme::makeImaginaruOnly(exact_);
+                        case INFINITY: return lexeme::makeInfinity(exact_);
+                            
+                        }
+                    }
+
+                }
+
+            // 初期状態で確定可能なリテラルをチェックする。
+            int complex_(utf8_string::UTF8String::const_utf8iterator& begin,
+                         const utf8_string::UTF8String::const_utf8iterator& end)
+                {
+                    // 一文字単位で調べていく。checker_に該当するデータの場合には、
+                    utf8_string::UTF8String s;
+                    s.insert(s.begin(), begin, end);
+                    if (s.toStr() == "-nan.0i" || s.toStr() == "+nan.0i")
+                    {
+                        // 虚数部が正/負の非数として登録する。
+                        return NAN_IMAGINARY;
+                    }
+                    else if (s.toStr() == "-inf.0i" || s.toStr() == "+inf.0i")
+                    {
+                        // 虚数部が正/負の無限大として処理する。
+                        return INF_IMAGINARY;
+                    }
+                    else if (s.toStr() == "-i" || s.toStr() == "+i")
+                    {
+                        // 0+1i,0-1iにそれぞれマッピングされる。
+                        return IMAGINARY_ONLY;
+                    }
+                    else if (s.toStr() == "-inf.0" || s.toStr() == "+inf.0")
+                    {
+                        // 正負の無限大にそれぞれマッピングされる。
+                        return INFINITY;
+                    }
+                    else if (s.toStr() == "-nan.0" || s.toStr() == "+nan.0")
+                    {
+                        // 正負の非数にそれぞれマッピングされる。
+                        return NAN;
+                    }
+
+                    if (begin->toStr() == '+' || begin->toStr() == '-')
+                    {
+                        // 先頭が符号の場合、次に来るべきはurealかnan.0、inf.0
+                        return FLAG;
+                    }
+
+                    // 上記以外の場合には、それはエラーであるため返す。
+                    throw LexException("number lex error : parse real");
+                }
+
+
+            smart_ptr<utf8_string::UTF8String> real_(utf8_string::UTF8String::const_utf8iterator& begin,
+                                                     const utf8_string::UTF8String::const_utf8iterator& end)
+                {
+                    // 実数の表現でなければならない。
+                    // 先頭一文字が符号表現か、符号無し表現で
+                    // あること。
+                    if (begin->toUTF16Code() == '+' || begin->toUTF16Code() == '-')
+                    {
+                        // 符号表現である場合には、以降の符号
+                        // 表現までを調べる。
+                        ++begin;
+                        utf8_string::UTF8String s;
+                        utf8_string::const_utf8iterator save = begin;
+                        while (begin != end && (begin->toUTF16Code() != '+' ||
+                                                begin->toUTF16Code() != '-')) {
+                            s += *begin++;
+                        }
+                        // とりあえず抜けた段階で、実際に抽出
+                        // してみたデータを調べてみる。
+                        if (s.toStr() == 'nan.0' || s.toStr() == 'inf.0')
+                        {
+                            // 非数/無限大として出力する。
+                            return smart_ptr<utf8_string::UTF8String>(
+                                utf8_string::UTF8String(s));
+                        }
+                        else
+                        {
+                            // <uinteger>,<uinteger>/<uinteger>,<decimal 10>のいずれか
+                            // である必要がある。
+                            begin = save;
+
+                            if (NUM != 10)
+                            {
+                                smart_ptr<utf8_string::UTF8String> s = uinteger_(begin, end);
+                                if (begin != end && begin->toUTF16Code() == '/')
+                                {
+                                    *s += *begin++;
+                                    *s += *(uinteger_(begin, end));
+                                }
+
+                                return s;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        // この場合、実数表現でなければならない。
+                        while (begin != end && (begin->toUTF16Code() != '+' ||
+                                                begin->toUTF16Code() != '-')) {
+                            s += *begin++;
+                        }
+                        // とりあえず抜けた段階で、実際に抽出
+                        // してみたデータを調べてみる。
+                        if (s.toStr() == 'nan.0' || s.toStr() == 'inf.0')
+                        {
+                            // 非数/無限大として出力する。
+                            return smart_ptr<utf8_string::UTF8String>(
+                                utf8_string::UTF8String(s));
+                        }
+                        else
+                        {
+                            // <uinteger>,<uinteger>/<uinteger>,<decimal 10>のいずれか
+                            // である必要がある。
+                            begin = save;
+
+                            if (NUM != 10)
+                            {
+                                smart_ptr<utf8_string::UTF8String> s = uinteger_(begin, end);
+                                if (begin != end && begin->toUTF16Code() == '/')
+                                {
+                                    *s += *begin++;
+                                    *s += *(uinteger_(begin, end));
+                                }
+
+                                return s;
+                            }
+                        }
+                    }
+                }
+
+            smart_ptr<utf8_string::UTF8String> uinteger_(utf8_string::UTF8String::const_utf8iterator& begin,
+                                                         const utf8_string::UTF8String::const_utf8iterator& end)
+                {
+                    // 基数が10進数ではない場合、利用されている数値が
+                    // checker_オブジェクトが該当するデータであるか
+                    // を調べるだけでよい。
+                    // 余計な文字が入っている場合には、字句構文違反になる。
+                    smart_ptr<utf8_string::UTF8String> s(new utf8_string::UTF8String);
+                    while (begin != end)
+                    {
+                        if (begin->toUTF16Code() == '.')
+                        {
+                            throw LexException("can't contained '.' at radix with the exception of 'decimal'");
+                        }
+                        else if (begin->toUTF16Code() == '/')
+                        {
+                            throw LexException("necessary an integer before of '/' token");
+                        }
+                        else if (checker_(*begin))
+                        {
+                            *s += *begin++;
+                        }
+                        else
+                        {
+                            std::stringstream ss;
+                            ss << begin->toUTF16Code() - '0' << " is not contained of '" << NUM;
+                            throw LexException(ss.str());
+                        }
+                    }
+
+                    return s;
+                }
+
+            // チェックを行うための関数オブジェクト。
+            T checker_;
+            bool exact_;
+        };
+
+    
+    };
+
+};
+
+#endif /* _NUMBER_LEXER_H_ */
diff --git a/sublexer_impl.cpp b/sublexer_impl.cpp

index 19fbf26..231e845 100644 (file)
--- a/sublexer_impl.cpp
+++ b/sublexer_impl.cpp
@@ -200,10 +200,6 @@ smart_ptr<lexeme::ILexeme> sublexer::FirstLexer::lex_(const utakata::utf8_string
  smart_ptr<lexeme::ILexeme> sublexer::StringLexer::lex(smart_ptr<utf8::UTF8InputStream> stream,
                                                        smart_ptr<sublexer::ISubLexer>& next)
  {
-<<<<<<< HEAD:sublexer_impl.cpp
-    // 文字列を解釈して設定する。文字列は、基本的には""で囲まれている限り、
-    // 改行まで全部含む。
-=======
      // 文字列を解析する。
  
      lexer_delimiter::Normal nor;
@@ -226,7 +222,6 @@ smart_ptr<lexeme::ILexeme> sublexer::StringLexer::lex(smart_ptr<utf8::UTF8InputS
              str += stream->read();
          }
      }
->>>>>>> 8699c6459d3f350a28feb1be42820a97a6e15e93:sublexer_impl.cpp
  
      return smart_ptr<lexeme::ILexeme>();
  }
diff --git a/sublexer_impl.h b/sublexer_impl.h

index df59d8b..74fd163 100644 (file)
--- a/sublexer_impl.h
+++ b/sublexer_impl.h
@@ -161,79 +161,6 @@ namespace utakata {
              smart_ptr<utakata::utf8_string::UTF8String> str_;
          };
  
-        /////////////////////////////////////////////////////////////////
-        // <number>がそれぞれのradixに基づいて解析する必要があるため、 //
-        // それぞれに対して特化できるようなテンプレートを用意する。    //
-        /////////////////////////////////////////////////////////////////
-
-        template<class T>
-        class NumberLexerTemp
-        {
-        public:
-            NumberLexerTemp() : checker_() {}
-            virtual ~NumberLexerTemp() {}
-
-            smart_ptr<lexeme::ILexeme> lex(smart_ptr<utakata::utf8::UTF8InputStream> stream,
-                                                    smart_ptr<ISubLexer>& next,
-                                                    bool exactness)
-                {
-                    // 全体的な流れは同一であり、基数の違いによる
-                    // 数値の違い程度しか問題は発生しない。
-                    utakata::lexer_delimiter::Normal nor;
-                    utakata::utf8_string::UTF8String number;
-                    while (true) {
-                        // デリミタまで読んでしまう。
-                        if (stream->isEOF())
-                        {
-                            throw LexException("illegal number format");
-                        }
-                        
-                        utakata::utf8_string::UTF8Char ch(stream->read());
-                        if (nor(ch))
-                        {
-                            stream->unget(ch.getBytes());
-                            break;
-                        }
-
-                        number += ch;
-                    }
-
-                    // 読出したnumberを解析してNumberオブジェクトとして返す。
-                    return lexNumber_(number);
-                }
-
-        private:
-
-            smart_ptr<lexeme::ILexeme> lexNumber_(const utakata::utf8_string::UTF8String& number)
-                {
-                    // デリミタまでを取得したデータを、実際の数値オブジェクトとして解釈させる。
-                    // 解釈できなかった場合、字句構文エラーとして扱う。
-                    utakata::utf8_string::UTF8String::const_utf8iterator it = number.begin(),
-                        end = number.end();
-
-                    int status = INIT;
-                    while (begin != end) {
-
-                        switch (status)
-                        {
-                        case INIT:
-                        {
-                            // 非終端記号で言えば<complex>に当たる。
-                            // 全ての整数は原則として +-0iの虚数と等しいため、そのようになる。
-                            
-                        // 一文字単位で調べていく。checker_に該当するデータの場合には、
-                        if (begin->toUTF16Code() == '+' ||
-                            begin->toUTF16Code() == '-')
-                        {
-                            // 符号である場合。
-                        }
-                        
-                    
-                }
-
-            // チェックを行うための関数オブジェクト。
-            T checker_;
-        };
      };
  
  };
author	derui <derutakayu@user.sourceforge.jp>
	Sat, 27 Jun 2009 00:53:19 +0000 (09:53 +0900)
committer	derui <derutakayu@user.sourceforge.jp>
	Sat, 27 Jun 2009 00:53:19 +0000 (09:53 +0900)
number_lexer.h	[new file with mode: 0644]	patch \| blob
sublexer_impl.cpp		patch \| blob \| history
sublexer_impl.h		patch \| blob \| history