parser.cpp

   1 #include <sstream>
   2
   3 #include "parser.h"
   4
   5 #include "lexeme.h"
   6 #include "lexeme_id.h"
   7
   8 using namespace utakata;
   9
  10
  11 parser::DatumException::DatumException(std::string str) : str_()
  12 {
  13     // エラーメッセージを定義する。
  14     std::stringstream ss;
  15     ss << "datum error ! -- message : [" << str << "]" << std::endl;
  16     str_ = ss.str();
  17 }
  18
  19 const char* parser::DatumException::what() const throw()
  20 {
  21     return str_.c_str();
  22 }
  23
  24 ////////////////////////////
  25 // Parser Implementations //
  26 ////////////////////////////
  27
  28 parser::Parser::Parser(const smart_ptr<lexer::Lexer>& l) : lexer_(l)
  29 {
  30 }
  31
  32 bool parser::Parser::parse(smart_ptr<utf8::UTF8InputStream>& strm)
  33 {
  34     PARSERSTATUS status = PS_INIT;
  35     smart_ptr<lexeme::ILexeme> lexm;
  36     // lexemeがNullである状態になるまで進む。
  37     int list_count = 1;
  38
  39     while (!(lexm = lexer_->lex(strm)).isNull()) {
  40         if (lexm.isNull())
  41         {
  42             break;
  43         }
  44
  45         // lexemeが返ってきたら、構文定義に該当するかどうかを調べる。
  46         // schemeの構文定義はシンプルかつ例外のないものになっているため、
  47         // 各定義とするのは簡単である。
  48         if (status == PS_INIT)
  49         {
  50             if (lexm->getID() == lexeme::LexemeID::openParenthesis)
  51             {
  52                 // 開き括弧である場合
  53                 status = PS_LIST_BEGIN;
  54                 ++list_count;
  55             }
  56             else if (lexm->getID() == lexeme::LexemeID::string ||
  57                      lexm->getID() == lexeme::LexemeID::number ||
  58                      lexm->getID() == lexeme::LexemeID::identifier ||
  59                      lexm->getID() == lexeme::LexemeID::charactor ||
  60                      lexm->getID() == lexeme::LexemeID::boolean)
  61             {
  62                 // それぞれの場合、lexeme_datumとして扱われる。
  63                 status = PS_LEXEME_DATUM;
  64             }
  65             else if (lexm->getID() == lexeme::LexemeID::byteVector)
  66             {
  67                 // バイトベクタを開始する。
  68                 status = PS_BYTEVECTOR_START;
  69             }
  70             else if (lexm->getID() == lexeme::LexemeID::vector)
  71             {
  72                 // ベクタを開始する。
  73                 status = PS_VECTOR_START;
  74             }
  75             else if (isAbbrev(lexm))
  76             {
  77                 // abbreviationを判別してstatusを返す。
  78                 status = guessAbbrev(lexm);
  79             }
  80             else
  81             {
  82                 throw DatumException("datumの開始記号ではありません");
  83             }
  84         }
  85         else if (status == PS_LIST_BEGIN)
  86         {
  87             // リストの開始記号である場合、PS_LIST_END以外は原則として
  88             // 全て取りこむ必要がある。
  89             if (lexm->getID() == lexeme::LexemeID::closeParenthesis)
  90             {
  91                 status = PS_INIT;
  92
  93             }
  94         }
  95     }
  96     return true;
  97 }
  98
  99 bool parser::Parser::isAbbrev(const smart_ptr<lexeme::ILexeme>& l)
 100 {
 101     switch (l->getID().toEnum())
 102     {
 103     case lexeme::LexemeID::BACKQUOTE:        return true;
 104     case lexeme::LexemeID::QUOTE:            return true;
 105     case lexeme::LexemeID::UNQUOTE:          return true;
 106     case lexeme::LexemeID::UNQUOTESPLICING:  return true;
 107     case lexeme::LexemeID::SYNTAX:           return true;
 108     case lexeme::LexemeID::QUASISYNTAX:      return true;
 109     case lexeme::LexemeID::UNSYNTAX:         return true;
 110     case lexeme::LexemeID::UNSYNTAXSPLICING: return true;
 111     default:
 112         return false;
 113     }
 114 }
 115
 116 parser::Parser::PARSERSTATUS parser::Parser::guessAbbrev(const smart_ptr<lexeme::ILexeme>& l)
 117 {
 118     switch (l->getID().toEnum())
 119     {
 120     case lexeme::LexemeID::BACKQUOTE:        return PS_LIST_ABB_QQUOTE;
 121     case lexeme::LexemeID::QUOTE:            return PS_LIST_ABB_QUOTE;
 122     case lexeme::LexemeID::UNQUOTE:          return PS_LIST_ABB_UQUOTE;
 123     case lexeme::LexemeID::UNQUOTESPLICING:  return PS_LIST_ABB_UQUOTE_SP;
 124     case lexeme::LexemeID::SYNTAX:           return PS_LIST_ABB_SYNTAX;
 125     case lexeme::LexemeID::QUASISYNTAX:      return PS_LIST_ABB_QSYNTAX;
 126     case lexeme::LexemeID::UNSYNTAX:         return PS_LIST_ABB_USYNTAX;
 127     case lexeme::LexemeID::UNSYNTAXSPLICING: return PS_LIST_ABB_USYNTAX_SP;
 128     default:
 129         return PS_INIT;
 130     }
 131 }
 132