lexerに関係するソースをsrc/lexer配下に移動。

[simplecms/utakata.git] / src / lexer / charactor_lexer.h
diff --git a/src/lexer/charactor_lexer.h b/src/lexer/charactor_lexer.h

new file mode 100755 (executable)

index 0000000..5538ab2
--- /dev/null
+++ b/src/lexer/charactor_lexer.h
@@ -0,0 +1,116 @@
+// Scheme構文における文字を判定するためのLexerです。
+// ここで定義されるLexerは、IPartsOfLexerから派生しており、以下の条件
+// をもってLexerDispatcherからディスパッチします。
+// ：検索対象文字列の先頭 = #\ の場合：
+#ifndef _UTAKATA_SRC_LEXER_CHARACTOR_LEXER_H_
+#define _UTAKATA_SRC_LEXER_CHARACTOR_LEXER_H_
+
+#include "src/lexer/lexer_interface.h"
+
+namespace utakata {
+namespace unicode {
+class UniString;
+class UniChar;
+};
+
+namespace lexeme {
+class ILexeme;
+}
+
+namespace reader {
+class EncodingReader;
+}
+
+namespace lexer {
+class CharactorDispatchTerm {
+ public:
+  CharactorDispatchTerm() {}
+  virtual ~CharactorDispatchTerm() {}
+
+  // ディスパッチを行うかどうかを判定します。
+  virtual bool IsDispatch(const unicode::UniString& string) const;
+};
+
+struct CharactorNames {
+  // charactor name の固定値です。それぞれ固有の値ですので、
+  // 定数として定義しています。
+  static const std::string kNUL       = "nul";
+  static const std::string kALARM     = "alarm";
+  static const std::string kBACKSPACE = "backspace";
+  static const std::string kTAB       = "tab";
+  static const std::string kLINEFEED  = "linefeed";
+  static const std::string kNEWLINE   = "newline";
+  static const std::string kVTAB      = "vtab";
+  static const std::string kPAGE      = "page";
+  static const std::string kRETURN    = "return";
+  static const std::string kESC       = "esc";
+  static const std::string kSPACE     = "space";
+  static const std::string kDELETE    = "delete";
+
+  // 渡されたstringがCharctor Nameであるかどうかを判別します。
+  // ここで許可されるCharactor Nameは次の通りです。
+  // それぞれは右に記載されているUnicode文字に変換されます。
+  // nul       U+0000
+  // alarm     U+0007
+  // backspace U+0008
+  // tab       U+0009
+  // linefeed  U+000A
+  // newline   U+000A
+  // vtab      U+000B
+  // page      U+000C
+  // return    U+000D
+  // esc       U+001B
+  // space     U+0020
+  // delete    U+007F
+  static bool IsCharactorName(const unicode::UniString& string);
+
+  // 渡されたunicodeの内容と一致するunicodeを返します。
+  // CharactorNameに該当しない場合、0x11ffffが返されます。
+  // 0x11ffffは、unicode:UniChar::kOutOfUnicodeとして定義されています。
+  static unsigned int GetCodeFromCharactorName(
+      const unicode::UniString& string);
+};
+
+class CharactorLexer : public IPartsOfLexer {
+  // Scheme構文における文字のチェックを行います。
+  // Scheme構文における文字リテラルは、以下のような形式となります。
+  //   #\<any charactor>
+  //   #\<charactor name>
+  //   #\x<hex scalar value>
+  // ただし、<any charactor>はかならず一文字の文字であり、デリミタか
+  // 入力の終端が続かない場合、charactor nameかhex xcalar value
+  // に該当しなければなりません。
+ public:
+
+  CharactorLexer() {}
+  virtual ~CharactorLexer() {}
+
+  // 読出しストリームへのポインタを受け取って、結果として生成した
+  // ILexemeインターフェースの派生クラスを返します。
+  virtual lexeme::ILexeme* Lex(const unicode::UniString& string,
+                               reader::EncodingReader* reader);
+
+  // このIPartOfLexerへとDispatchする条件を記載したDispatchTermを作成して
+  // 返します。返されたポインタは、取得側で削除する必要があります。
+  // 返された値の保持については、utility::scoped_ptrの利用を推奨します。
+  virtual ILexerDispatchTerm* GetTerm() const;
+
+ private:
+
+  // 与えられた文字列がCharactor Nameのいずれかと一致する場合、
+  // 変換後のUnicode文字を返します。
+  // IsCharactorNameがfalseとなるstringである場合、LexException例外
+  // が発生します。
+  void ConvertCharctorNameToCode(const unicode::UniString& string,
+                                 unicode::UniChar* code);
+
+  // 与えられたHex ValueをUnicode文字として解釈します。
+  // 与えられたHexValueが範囲外か、もしくはHexValueとして不正である場合、
+  // LexException例外が発生します。
+  void ConvertHexValueToCode(const unicode::UniString& string,
+                             unicode::UniChar* code);
+};
+}
+}
+
+#endif /* _UTAKATA_SRC_LEXER_CHARACTOR_LEXER_H_ */