lexeme.h

   1 #ifndef _LEXEME_H_
   2 #define _LEXEME_H_
   3
   4 #include "smart_ptr.h"
   5
   6 namespace utakata {
   7
   8     namespace lexeme {
   9
  10         class utakata::utf8_string::CUTF8InputStream;
  11         class utakata::utf8_string::CUTF8String;
  12         class CLexeme;
  13
  14         // lexer名前空間で使用されるそれぞれの終端記号と
  15         // それらを解析するための関数オブジェクトを定義する。
  16
  17         struct CIdentityLexer
  18         {
  19             /*
  20               <identifier>を解釈するためのfunctor。
  21               <identifier>は次のBNF構文で定義される。
  22
  23               <identifier> - <initial> <subsequent>*
  24                            | <peculiar identifier>
  25              */
  26             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  27         };
  28
  29         struct CInitialLexer
  30         {
  31             /*
  32               <initial>を解釈するためのfunctor。
  33               <initial>は次のBNF構文で定義される。
  34
  35               <initial> - <constituent> | <special initial>
  36                         | <inline hex escape>
  37             */
  38             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  39         };
  40
  41         struct CConstituentLexer
  42         {
  43             /*
  44               <constituent>を解釈するためのfunctor。
  45               <constituent>は次のBNF構文で定義される。
  46
  47               <constituent> - <letter>
  48               | 〈any character whose Unicode scalar value is greater than
  49                   127, and whose category is Lu, Ll, Lt, Lm, Lo, Mn,
  50                   Nl, No, Pd, Pc, Po, Sc, Sm, Sk, So, or Co〉
  51             */
  52             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  53         };
  54
  55         struct CLetterlexer
  56         {
  57             /*
  58               <letter>を解釈するためのfunctor。
  59               <letter>は次のBNF構文で定義される。
  60
  61               <letter> - a | b | c | ... | z
  62                        | A | B | C | ... | Z
  63             */
  64             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  65         };
  66
  67         struct CSpecialInitialLexer
  68         {
  69             /*
  70               <special initial>を解釈するためのfunctor。
  71               <special initial>は次のBNF構文で定義される。
  72               <special initial> - ! | $ | % | & | * | / | : | < | =
  73                                 | > | ? | ^ | _ | ~
  74
  75             */
  76             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  77         };
  78
  79         struct CDigitlexer
  80         {
  81             /*
  82               <digit>を解釈するためのfunctor。
  83               <digit>は次のBNF構文で定義される。
  84               <digit> - 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
  85             */
  86             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  87         };
  88
  89         struct CHexDigitLexer
  90         {
  91             /*
  92               <hex digit>を解釈するためのfunctor。
  93               <hex digit>は次のBNF構文で定義される。
  94               <hex digit> - <digit>
  95                           | a | A | b | B | c | C | d | D | e | E | f
  96                           | F
  97             */
  98             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
  99         };
 100
 101         struct CSpecialSubsequentLexer
 102         {
 103             /*
 104               <special subsequent>を解釈するためのfunctor。
 105               <special subsequent>は次のBNF構文で定義される。
 106               <special subsequent> → + | - | . | @
 107             */
 108             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 109         };
 110
 111         struct CInlineHexEscapeLexer
 112         {
 113             /*
 114               <inline hex escape>を解釈するためのfunctor
 115               <inline hex escape>は次のBNF構文で定義される。
 116               <inline hex escape> - \x<hex scalar value>;
 117             */
 118             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 119         };
 120
 121         struct CHexScalarValueLexer
 122         {
 123             //<hex scalar value>を解釈するためのfunctor。
 124             //<hex scalar value>は次のBNF構文で定義される。
 125             //<hex scalar value> - <hex digit>+
 126             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 127         };
 128
 129         struct CBooleanLexer
 130         {
 131             // <boolean>を解釈するためのfunctor。
 132             // <boolean>は次のBNF構文で定義される。
 133             // <boolean> - #t | #T | #f | #F
 134             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 135         };
 136
 137         struct CCharactorLexer
 138         {
 139             // <charactor>を解釈するためのfunctor。
 140             // <charactor>は次のBNF構文で定義される。
 141             // <character> - #\<any character>
 142             //                 | #\<character name>
 143             //                 | #\x<hex scalar value>
 144             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 145         };
 146
 147         struct CCharactorNameLexer
 148         {
 149 //             <character name>を解釈するためのfunctor。
 150 //             <charactor name>は次のBNF構文で定義される。
 151 //             <character name> - nul | alarm | backspace | tab
 152 //             | linefeed | newline | vtab | page | return
 153 //             | esc | space | delete
 154
 155             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 156         };
 157
 158         struct CStringlexer
 159         {
 160 //             <string>を解釈するためのfunctor。
 161 //             <string>は次のBNF構文で定義される。
 162 //             <string> - " <string element>* "
 163             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 164         };
 165
 166         struct CStringElementLexer
 167         {
 168 //             <string element>を解釈するためのfunctor。
 169 //             <string element>は次のBNF構文で定義される。
 170 //             <string element> - <any character other than " or \>
 171 //          | \a | \b | \t | \n | \v | \f | \r
 172 //          | \" | \\
 173 //          | \<intraline whitespace>*<line ending>
 174 //             <intraline whitespace>*
 175 //          | <inline hex escape>
 176             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 177         };
 178
 179         struct CIntralineWhitespaceLexer
 180         {
 181 //             <intraline whitespace>を解釈するためのfunctor。
 182 //             <intraline whitespace>は次のBNF構文で定義される。
 183 //             <intraline whitespace> - <character tabulation>
 184 //             | <any character whose category is Zs>
 185             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 186         };
 187
 188         struct CDelimiterlexer
 189         {
 190 //             <delimiter>を解釈するためのfunctor。
 191 //             <delimiter>は次のBNF構文で定義される。
 192 //             <delimiter> - ( | ) | [ | ] | " | ; | #
 193 //          | <whitespace>
 194             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 195         };
 196
 197         struct CWhitespaceLexer
 198         {
 199 //             <whitespace>を解釈するためのfunctor。
 200 //             <whitespace>は次のBNF構文で定義される。
 201 //             <whitespace> - <character tabulation>
 202 //             | <linefeed> | <line tabulation> | <form feed>
 203 //             | <carriage return> | <next line>
 204 //             | <any character whose category is Zs, Zl, or Zp>
 205             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 206         };
 207
 208         struct CLineEndingLexer
 209         {
 210 //             <line ending>を解釈するためのfunctor。
 211 //             <line ending>は次のBNF構文で定義される。
 212 //             <line ending> - <linefeed> | <carriage return>
 213 //             | <carriage return> <linefeed> | <next line>
 214 //             | <carriage return> <next line> | <line separator>
 215             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 216         };
 217
 218         struct CCommentLexer
 219         {
 220 //             <comment>を解釈するためのfunctor。
 221 //             <comment>は次のBNF構文で定義される。
 222 //             <comment> - ; 〈all subsequent characters up to a
 223 //             <line ending> or <paragraph separator>〉
 224 //             | <nested comment>
 225 //             | #; <interlexeme space> <datum>
 226 //             | #!r6rs
 227             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 228         };
 229
 230         struct CNestedCommentLexer
 231         {
 232 //             <nested comment>を解釈するためのfunctor。
 233 //             <nested comment>は次のBNF構文で定義される。
 234 //             <nested comment> - #| <comment text>
 235 //             <comment cont>* |#
 236             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 237         };
 238
 239         struct CCommentTextLexer
 240         {
 241 //             <comment text>を解釈するためのfunctor。
 242 //             <comment text>は次のBNF構文で定義される。
 243 //             <comment text> - 〈character sequence not containing
 244 //             #| or |#〉
 245             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 246         };
 247
 248         struct CCommentContLexer
 249         {
 250 //             <comment cont>を解釈するためのfunctor。
 251 //             <comment cont>は次のBNF構文で定義される。
 252 //             <comment cont> - <nested comment> <comment text>
 253             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 254         };
 255
 256
 257         struct CNumberLexer
 258         {
 259 //             <number>を解釈するためのfunctor。
 260 //             <number>は次のBNF構文で定義される。
 261 //             <number> - <num 2> | <num 8>
 262 //             | <num 10> | <num 16>
 263             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 264         };
 265
 266         struct CNum2Lexer
 267         {
 268 //             <num 2>を解釈するためのfunctor。
 269 //             <num 2>は次のBNF構文で定義される。
 270 //             <num R> - <prefix R> <complex R>
 271 //             <complex 2> - <real 2> | <real 2> @ <real 2>
 272 //             | <real 2> + <ureal 2> i | <real 2> - <ureal 2> i
 273 //             | <real 2> + <naninf> i | <real 2> - <naninf> i
 274 //             | <real 2> + i | <real 2> - i
 275 //             | + <ureal 2> i | - <ureal 2> i
 276 //             | + <naninf> i | - <naninf> i
 277 //             | + i | - i
 278 //             <real 2> - <sign> <ureal 2>
 279 //             | + <naninf> | - <naninf>
 280 //             <naninf> - nan.0 | inf.0
 281 //             <ureal 2> - <uinteger 2>
 282 //             | <uinteger 2> / <uinteger 2>
 283 //             | <decimal 2> <mantissa width>
 284 //             <decimal 10> - <uinteger 10> <suffix>
 285 //             | . <digit 10>+ <suffix>
 286 //             | <digit 10>+ . <digit 10>* <suffix>
 287 //             <uinteger 2> - <digit 2>+
 288 //             <prefix 2> - <radix 2> <exactness>
 289 //             | <exactness> <radix 2>
 290
 291 //             <suffix> - <empty>
 292 //             | <exponent marker> <sign> <digit 10>+
 293 //             <exponent marker> - e | E | s | S | f | F
 294 //             | d | D | l | L
 295 //             <mantissa width> - <empty>
 296 //             | | <digit 10>+
 297 //             <sign> - <empty> | + | -
 298 //             <exactness> - <empty>
 299 //             | #i| #I | #e| #E
 300 //             <radix 2> - #b| #B
 301 //             <digit 2> - 0 | 1
 302             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 303         };
 304
 305         struct CNum8Lexer
 306         {
 307 //             <num 8>を解釈するためのfunctor。
 308 //             <num 8>は次のBNF構文で定義される。
 309 //             <num R> - <prefix R> <complex R>
 310 //             <complex 8> - <real 8> | <real 8> @ <real 8>
 311 //             | <real 8> + <ureal 8> i | <real 8> - <ureal 8> i
 312 //             | <real 8> + <naninf> i | <real 8> - <naninf> i
 313 //             | <real 8> + i | <real 8> - i
 314 //             | + <ureal 8> i | - <ureal 8> i
 315 //             | + <naninf> i | - <naninf> i
 316 //             | + i | - i
 317 //             <real 8> - <sign> <ureal 8>
 318 //             | + <naninf> | - <naninf>
 319 //             <naninf> - nan.0 | inf.0
 320 //             <ureal 8> - <uinteger 8>
 321 //             | <uinteger 8> / <uinteger 8>
 322 //             | <decimal 8> <mantissa width>
 323 //             <decimal 10> - <uinteger 10> <suffix>
 324 //             | . <digit 10>+ <suffix>
 325 //             | <digit 10>+ . <digit 10>* <suffix>
 326 //             <uinteger 8> - <digit 8>+
 327 //             <prefix 8> - <radix 8> <exactness>
 328 //             | <exactness> <radix 8>
 329
 330 //             <suffix> - <empty>
 331 //             | <exponent marker> <sign> <digit 10>+
 332 //             <exponent marker> - e | E | s | S | f | F
 333 //             | d | D | l | L
 334 //             <mantissa width> - <empty>
 335 //             | | <digit 10>+
 336 //             <sign> - <empty> | + | -
 337 //             <exactness> - <empty>
 338 //             | #i| #I | #e| #E
 339 //             <radix 8> - #b| #B
 340 //             <digit 8> - 0 | 1
 341             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 342         };
 343
 344         struct CNum10Lexer
 345         {
 346 //             <num 10>を解釈するためのfunctor。
 347 //             <num 10>は次のBNF構文で定義される。
 348 //             <num R> - <prefix R> <complex R>
 349 //             <complex 10> - <real 10> | <real 10> @ <real 10>
 350 //             | <real 10> + <ureal 10> i | <real 10> - <ureal 10> i
 351 //             | <real 10> + <naninf> i | <real 10> - <naninf> i
 352 //             | <real 10> + i | <real 10> - i
 353 //             | + <ureal 10> i | - <ureal 10> i
 354 //             | + <naninf> i | - <naninf> i
 355 //             | + i | - i
 356 //             <real 10> - <sign> <ureal 10>
 357 //             | + <naninf> | - <naninf>
 358 //             <naninf> - nan.0 | inf.0
 359 //             <ureal 10> - <uinteger 10>
 360 //             | <uinteger 10> / <uinteger 10>
 361 //             | <decimal 10> <mantissa width>
 362 //             <decimal 10> - <uinteger 10> <suffix>
 363 //             | . <digit 10>+ <suffix>
 364 //             | <digit 10>+ . <digit 10>* <suffix>
 365 //             <uinteger 10> - <digit 10>+
 366 //             <prefix 10> - <radix 10> <exactness>
 367 //             | <exactness> <radix 10>
 368
 369 //             <suffix> - <empty>
 370 //             | <exponent marker> <sign> <digit 10>+
 371 //             <exponent marker> - e | E | s | S | f | F
 372 //             | d | D | l | L
 373 //             <mantissa width> - <empty>
 374 //             | | <digit 10>+
 375 //             <sign> - <empty> | + | -
 376 //             <exactness> - <empty>
 377 //             | #i| #I | #e| #E
 378 //             <radix 10> - #b| #B
 379 //             <digit 10> - 0 | 1
 380             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 381         };
 382
 383         struct CNum16Lexer
 384         {
 385 //             <num 16>を解釈するためのfunctor。
 386 //             <num 16>は次のBNF構文で定義される。
 387 //             <num R> - <prefix R> <complex R>
 388 //             <complex 16> - <real 16> | <real 16> @ <real 16>
 389 //             | <real 16> + <ureal 16> i | <real 16> - <ureal 16> i
 390 //             | <real 16> + <naninf> i | <real 16> - <naninf> i
 391 //             | <real 16> + i | <real 16> - i
 392 //             | + <ureal 16> i | - <ureal 16> i
 393 //             | + <naninf> i | - <naninf> i
 394 //             | + i | - i
 395 //             <real 16> - <sign> <ureal 16>
 396 //             | + <naninf> | - <naninf>
 397 //             <naninf> - nan.0 | inf.0
 398 //             <ureal 16> - <uinteger 16>
 399 //             | <uinteger 16> / <uinteger 16>
 400 //             | <decimal 16> <mantissa width>
 401 //             <decimal 10> - <uinteger 10> <suffix>
 402 //             | . <digit 10>+ <suffix>
 403 //             | <digit 10>+ . <digit 10>* <suffix>
 404 //             <uinteger 16> - <digit 16>+
 405 //             <prefix 16> - <radix 16> <exactness>
 406 //             | <exactness> <radix 16>
 407
 408 //             <suffix> - <empty>
 409 //             | <exponent marker> <sign> <digit 10>+
 410 //             <exponent marker> - e | E | s | S | f | F
 411 //             | d | D | l | L
 412 //             <mantissa width> - <empty>
 413 //             | | <digit 10>+
 414 //             <sign> - <empty> | + | -
 415 //             <exactness> - <empty>
 416 //             | #i| #I | #e| #E
 417 //             <radix 16> - #b| #B
 418 //             <digit 16> - 0 | 1
 419             smart_ptr<CLexeme> operator()(smart_ptr<utakata::utf8_string::CUTF8InputStream>& stream);
 420         };
 421
 422         //======================================================================
 423
 424         class CLexeme
 425         {
 426             // 非終端記号、及び終端記号を表す。
 427             // とりあえず最もシンプルな形を取るよ。
 428         public:
 429             ILexeme(const utakata::utf8_string::CUTF8String& str, int id);
 430             virtual ~ILexeme(){}
 431
 432             // 終端記号、非終端記号のIDを取得する。
 433             int getID() const;
 434
 435             // そのものを表す文字列を返す。
 436             const utakata::utf8_string::CUTF8String& getString() const;
 437
 438         private:
 439
 440             // pimplイディオムを使用する。
 441             struct Impl;
 442             smart_ptr<Impl> pimpl_;
 443         };
 444
 445     };
 446
 447 };
 448
 449 #endif /* _LEXEME_H_ */