OSDN Git Service

Commit DialogBox compile Okay
[tortoisegit/TortoiseGitJp.git] / ext / scintilla / src / LexCaml.cxx
diff --git a/ext/scintilla/src/LexCaml.cxx b/ext/scintilla/src/LexCaml.cxx
new file mode 100644 (file)
index 0000000..845ac65
--- /dev/null
@@ -0,0 +1,403 @@
+// Scintilla source code edit control\r
+/** @file LexCaml.cxx\r
+ ** Lexer for Objective Caml.\r
+ **/\r
+// Copyright 2005 by Robert Roessler <robertr@rftp.com>\r
+// The License.txt file describes the conditions under which this software may be distributed.\r
+/*     Release History\r
+       20050204 Initial release.\r
+       20050205 Quick compiler standards/"cleanliness" adjustment.\r
+       20050206 Added cast for IsLeadByte().\r
+       20050209 Changes to "external" build support.\r
+       20050306 Fix for 1st-char-in-doc "corner" case.\r
+       20050502 Fix for [harmless] one-past-the-end coloring.\r
+       20050515 Refined numeric token recognition logic.\r
+       20051125 Added 2nd "optional" keywords class.\r
+       20051129 Support "magic" (read-only) comments for RCaml.\r
+       20051204 Swtich to using StyleContext infrastructure.\r
+*/\r
+\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <ctype.h>\r
+#include <stdio.h>\r
+#include <stdarg.h>\r
+\r
+#include "Platform.h"\r
+\r
+#include "PropSet.h"\r
+#include "Accessor.h"\r
+#include "StyleContext.h"\r
+#include "KeyWords.h"\r
+#include "Scintilla.h"\r
+#include "SciLexer.h"\r
+\r
+//     Since the Microsoft __iscsym[f] funcs are not ANSI...\r
+inline int  iscaml(int c) {return isalnum(c) || c == '_';}\r
+inline int iscamlf(int c) {return isalpha(c) || c == '_';}\r
+inline int iscamld(int c) {return isdigit(c) || c == '_';}\r
+\r
+static const int baseT[24] = {\r
+       0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     /* A - L */\r
+       0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16      /* M - X */\r
+};\r
+\r
+#ifdef SCI_NAMESPACE\r
+using namespace Scintilla;\r
+#endif\r
+\r
+#ifdef BUILD_AS_EXTERNAL_LEXER\r
+/*\r
+       (actually seems to work!)\r
+*/\r
+#include "WindowAccessor.h"\r
+#include "ExternalLexer.h"\r
+\r
+#if PLAT_WIN\r
+#include <windows.h>\r
+#endif\r
+\r
+static void ColouriseCamlDoc(\r
+       unsigned int startPos, int length,\r
+       int initStyle,\r
+       WordList *keywordlists[],\r
+       Accessor &styler);\r
+\r
+static void FoldCamlDoc(\r
+       unsigned int startPos, int length,\r
+       int initStyle,\r
+       WordList *keywordlists[],\r
+       Accessor &styler);\r
+\r
+static void InternalLexOrFold(int lexOrFold, unsigned int startPos, int length,\r
+       int initStyle, char *words[], WindowID window, char *props);\r
+\r
+static const char* LexerName = "caml";\r
+\r
+#ifdef TRACE\r
+void Platform::DebugPrintf(const char *format, ...) {\r
+       char buffer[2000];\r
+       va_list pArguments;\r
+       va_start(pArguments, format);\r
+       vsprintf(buffer,format,pArguments);\r
+       va_end(pArguments);\r
+       Platform::DebugDisplay(buffer);\r
+}\r
+#else\r
+void Platform::DebugPrintf(const char *, ...) {\r
+}\r
+#endif\r
+\r
+bool Platform::IsDBCSLeadByte(int codePage, char ch) {\r
+       return ::IsDBCSLeadByteEx(codePage, ch) != 0;\r
+}\r
+\r
+long Platform::SendScintilla(WindowID w, unsigned int msg, unsigned long wParam, long lParam) {\r
+       return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam, lParam);\r
+}\r
+\r
+long Platform::SendScintillaPointer(WindowID w, unsigned int msg, unsigned long wParam, void *lParam) {\r
+       return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam,\r
+               reinterpret_cast<LPARAM>(lParam));\r
+}\r
+\r
+void EXT_LEXER_DECL Fold(unsigned int lexer, unsigned int startPos, int length,\r
+       int initStyle, char *words[], WindowID window, char *props)\r
+{\r
+       // below useless evaluation(s) to supress "not used" warnings\r
+       lexer;\r
+       // build expected data structures and do the Fold\r
+       InternalLexOrFold(1, startPos, length, initStyle, words, window, props);\r
+\r
+}\r
+\r
+int EXT_LEXER_DECL GetLexerCount()\r
+{\r
+       return 1;       // just us [Objective] Caml lexers here!\r
+}\r
+\r
+void EXT_LEXER_DECL GetLexerName(unsigned int Index, char *name, int buflength)\r
+{\r
+       // below useless evaluation(s) to supress "not used" warnings\r
+       Index;\r
+       // return as much of our lexer name as will fit (what's up with Index?)\r
+       if (buflength > 0) {\r
+               buflength--;\r
+               int n = strlen(LexerName);\r
+               if (n > buflength)\r
+                       n = buflength;\r
+               memcpy(name, LexerName, n), name[n] = '\0';\r
+       }\r
+}\r
+\r
+void EXT_LEXER_DECL Lex(unsigned int lexer, unsigned int startPos, int length,\r
+       int initStyle, char *words[], WindowID window, char *props)\r
+{\r
+       // below useless evaluation(s) to supress "not used" warnings\r
+       lexer;\r
+       // build expected data structures and do the Lex\r
+       InternalLexOrFold(0, startPos, length, initStyle, words, window, props);\r
+}\r
+\r
+static void InternalLexOrFold(int foldOrLex, unsigned int startPos, int length,\r
+       int initStyle, char *words[], WindowID window, char *props)\r
+{\r
+       // create and initialize a WindowAccessor (including contained PropSet)\r
+       PropSet ps;\r
+       ps.SetMultiple(props);\r
+       WindowAccessor wa(window, ps);\r
+       // create and initialize WordList(s)\r
+       int nWL = 0;\r
+       for (; words[nWL]; nWL++) ;     // count # of WordList PTRs needed\r
+       WordList** wl = new WordList* [nWL + 1];// alloc WordList PTRs\r
+       int i = 0;\r
+       for (; i < nWL; i++) {\r
+               wl[i] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)\r
+               wl[i]->Set(words[i]);\r
+       }\r
+       wl[i] = 0;\r
+       // call our "internal" folder/lexer (... then do Flush!)\r
+       if (foldOrLex)\r
+               FoldCamlDoc(startPos, length, initStyle, wl, wa);\r
+       else\r
+               ColouriseCamlDoc(startPos, length, initStyle, wl, wa);\r
+       wa.Flush();\r
+       // clean up before leaving\r
+       for (i = nWL - 1; i >= 0; i--)\r
+               delete wl[i];\r
+       delete [] wl;\r
+}\r
+\r
+static\r
+#endif /* BUILD_AS_EXTERNAL_LEXER */\r
+\r
+void ColouriseCamlDoc(\r
+       unsigned int startPos, int length,\r
+       int initStyle,\r
+       WordList *keywordlists[],\r
+       Accessor &styler)\r
+{\r
+       // initialize styler\r
+       StyleContext sc(startPos, length, initStyle, styler);\r
+       // set up [initial] state info (terminating states that shouldn't "bleed")\r
+       int nesting = 0;\r
+       if (sc.state < SCE_CAML_STRING)\r
+               sc.state = SCE_CAML_DEFAULT;\r
+       if (sc.state >= SCE_CAML_COMMENT)\r
+               nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT;\r
+\r
+       int chBase = 0, chToken = 0, chLit = 0;\r
+       WordList& keywords  = *keywordlists[0];\r
+       WordList& keywords2 = *keywordlists[1];\r
+       WordList& keywords3 = *keywordlists[2];\r
+       const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0);\r
+\r
+       // foreach char in range...\r
+       while (sc.More()) {\r
+               // set up [per-char] state info\r
+               int state2 = -1;                // (ASSUME no state change)\r
+               int chColor = sc.currentPos - 1;// (ASSUME standard coloring range)\r
+               bool advance = true;    // (ASSUME scanner "eats" 1 char)\r
+\r
+               // step state machine\r
+               switch (sc.state & 0x0f) {\r
+               case SCE_CAML_DEFAULT:\r
+                       chToken = sc.currentPos;        // save [possible] token start (JIC)\r
+                       // it's wide open; what do we have?\r
+                       if (iscamlf(sc.ch))\r
+                               state2 = SCE_CAML_IDENTIFIER;\r
+                       else if (sc.Match('`') && iscamlf(sc.chNext))\r
+                               state2 = SCE_CAML_TAGNAME;\r
+                       else if (sc.Match('#') && isdigit(sc.chNext))\r
+                               state2 = SCE_CAML_LINENUM;\r
+                       else if (isdigit(sc.ch)) {\r
+                               state2 = SCE_CAML_NUMBER, chBase = 10;\r
+                               if (sc.Match('0') && strchr("bBoOxX", sc.chNext))\r
+                                       chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward();\r
+                       } else if (sc.Match('\''))      /* (char literal?) */\r
+                               state2 = SCE_CAML_CHAR, chLit = 0;\r
+                       else if (sc.Match('\"'))\r
+                               state2 = SCE_CAML_STRING;\r
+                       else if (sc.Match('(', '*'))\r
+                               state2 = SCE_CAML_COMMENT,\r
+                                       sc.ch = ' ',    // (make SURE "(*)" isn't seen as a closed comment)\r
+                                       sc.Forward();\r
+                       else if (strchr("!?~"           /* Caml "prefix-symbol" */\r
+                                       "=<>@^|&+-*/$%"         /* Caml "infix-symbol" */\r
+                                       "()[]{};,:.#", sc.ch))  /* Caml "bracket" or ;,:.# */\r
+                               state2 = SCE_CAML_OPERATOR;\r
+                       break;\r
+\r
+               case SCE_CAML_IDENTIFIER:\r
+                       // [try to] interpret as [additional] identifier char\r
+                       if (!(iscaml(sc.ch) || sc.Match('\''))) {\r
+                               const int n = sc.currentPos - chToken;\r
+                               if (n < 24) {\r
+                                       // length is believable as keyword, [re-]construct token\r
+                                       char t[24];\r
+                                       for (int i = -n; i < 0; i++)\r
+                                               t[n + i] = static_cast<char>(sc.GetRelative(i));\r
+                                       t[n] = '\0';\r
+                                       // special-case "_" token as KEYWORD\r
+                                       if ((n == 1 && sc.chPrev == '_') || keywords.InList(t))\r
+                                               sc.ChangeState(SCE_CAML_KEYWORD);\r
+                                       else if (keywords2.InList(t))\r
+                                               sc.ChangeState(SCE_CAML_KEYWORD2);\r
+                                       else if (keywords3.InList(t))\r
+                                               sc.ChangeState(SCE_CAML_KEYWORD3);\r
+                               }\r
+                               state2 = SCE_CAML_DEFAULT, advance = false;\r
+                       }\r
+                       break;\r
+\r
+               case SCE_CAML_TAGNAME:\r
+                       // [try to] interpret as [additional] tagname char\r
+                       if (!(iscaml(sc.ch) || sc.Match('\'')))\r
+                               state2 = SCE_CAML_DEFAULT, advance = false;\r
+                       break;\r
+\r
+               /*case SCE_CAML_KEYWORD:\r
+               case SCE_CAML_KEYWORD2:\r
+               case SCE_CAML_KEYWORD3:\r
+                       // [try to] interpret as [additional] keyword char\r
+                       if (!iscaml(ch))\r
+                               state2 = SCE_CAML_DEFAULT, advance = false;\r
+                       break;*/\r
+\r
+               case SCE_CAML_LINENUM:\r
+                       // [try to] interpret as [additional] linenum directive char\r
+                       if (!isdigit(sc.ch))\r
+                               state2 = SCE_CAML_DEFAULT, advance = false;\r
+                       break;\r
+\r
+               case SCE_CAML_OPERATOR: {\r
+                       // [try to] interpret as [additional] operator char\r
+                       const char* o = 0;\r
+                       if (iscaml(sc.ch) || isspace(sc.ch)                /* ident or whitespace */\r
+                               || (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */\r
+                               || !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) {\r
+                               // check for INCLUSIVE termination\r
+                               if (o && strchr(")]};,", sc.ch)) {\r
+                                       if ((sc.Match(')') && sc.chPrev == '(')\r
+                                               || (sc.Match(']') && sc.chPrev == '['))\r
+                                               // special-case "()" and "[]" tokens as KEYWORDS\r
+                                               sc.ChangeState(SCE_CAML_KEYWORD);\r
+                                       chColor++;\r
+                               } else\r
+                                       advance = false;\r
+                               state2 = SCE_CAML_DEFAULT;\r
+                       }\r
+                       break;\r
+               }\r
+\r
+               case SCE_CAML_NUMBER:\r
+                       // [try to] interpret as [additional] numeric literal char\r
+                       // N.B. - improperly accepts "extra" digits in base 2 or 8 literals\r
+                       if (iscamld(sc.ch) || IsADigit(sc.ch, chBase))\r
+                               break;\r
+                       // how about an integer suffix?\r
+                       if ((sc.Match('l') || sc.Match('L') || sc.Match('n'))\r
+                               && (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase)))\r
+                               break;\r
+                       // or a floating-point literal?\r
+                       if (chBase == 10) {\r
+                               // with a decimal point?\r
+                               if (sc.Match('.') && iscamld(sc.chPrev))\r
+                                       break;\r
+                               // with an exponent? (I)\r
+                               if ((sc.Match('e') || sc.Match('E'))\r
+                                       && (iscamld(sc.chPrev) || sc.chPrev == '.'))\r
+                                       break;\r
+                               // with an exponent? (II)\r
+                               if ((sc.Match('+') || sc.Match('-'))\r
+                                       && (sc.chPrev == 'e' || sc.chPrev == 'E'))\r
+                                       break;\r
+                       }\r
+                       // it looks like we have run out of number\r
+                       state2 = SCE_CAML_DEFAULT, advance = false;\r
+                       break;\r
+\r
+               case SCE_CAML_CHAR:\r
+                       // [try to] interpret as [additional] char literal char\r
+                       if (sc.Match('\\')) {\r
+                               chLit = 1;      // (definitely IS a char literal)\r
+                               if (sc.chPrev == '\\')\r
+                                       sc.ch = ' ';    // (so termination test isn't fooled)\r
+                       // should we be terminating - one way or another?\r
+                       } else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) {\r
+                               state2 = SCE_CAML_DEFAULT;\r
+                               if (sc.Match('\''))\r
+                                       chColor++;\r
+                               else\r
+                                       sc.ChangeState(SCE_CAML_IDENTIFIER);\r
+                       // ... maybe a char literal, maybe not\r
+                       } else if (chLit < 1 && sc.currentPos - chToken >= 2)\r
+                               sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false;\r
+                       break;\r
+\r
+               case SCE_CAML_STRING:\r
+                       // [try to] interpret as [additional] string literal char\r
+                       if (sc.Match('\\') && sc.chPrev == '\\')\r
+                               sc.ch = ' ';    // (so '\\' doesn't cause us trouble)\r
+                       else if (sc.Match('\"') && sc.chPrev != '\\')\r
+                               state2 = SCE_CAML_DEFAULT, chColor++;\r
+                       break;\r
+\r
+               case SCE_CAML_COMMENT:\r
+               case SCE_CAML_COMMENT1:\r
+               case SCE_CAML_COMMENT2:\r
+               case SCE_CAML_COMMENT3:\r
+                       // we're IN a comment - does this start a NESTED comment?\r
+                       if (sc.Match('(', '*'))\r
+                               state2 = sc.state + 1, chToken = sc.currentPos,\r
+                                       sc.ch = ' ',    // (make SURE "(*)" isn't seen as a closed comment)\r
+                                       sc.Forward(), nesting++;\r
+                       // [try to] interpret as [additional] comment char\r
+                       else if (sc.Match(')') && sc.chPrev == '*') {\r
+                               if (nesting)\r
+                                       state2 = (sc.state & 0x0f) - 1, chToken = 0, nesting--;\r
+                               else\r
+                                       state2 = SCE_CAML_DEFAULT;\r
+                               chColor++;\r
+                       // enable "magic" (read-only) comment AS REQUIRED\r
+                       } else if (useMagic && sc.currentPos - chToken == 4\r
+                               && sc.Match('c') && sc.chPrev == 'r' && sc.GetRelative(-2) == '@')\r
+                               sc.state |= 0x10;       // (switch to read-only comment style)\r
+                       break;\r
+               }\r
+\r
+               // handle state change and char coloring as required\r
+               if (state2 >= 0)\r
+                       styler.ColourTo(chColor, sc.state), sc.ChangeState(state2);\r
+               // move to next char UNLESS re-scanning current char\r
+               if (advance)\r
+                       sc.Forward();\r
+       }\r
+\r
+       // do any required terminal char coloring (JIC)\r
+       sc.Complete();\r
+}\r
+\r
+#ifdef BUILD_AS_EXTERNAL_LEXER\r
+static\r
+#endif /* BUILD_AS_EXTERNAL_LEXER */\r
+void FoldCamlDoc(\r
+       unsigned int startPos, int length,\r
+       int initStyle,\r
+       WordList *keywordlists[],\r
+       Accessor &styler)\r
+{\r
+       // below useless evaluation(s) to supress "not used" warnings\r
+       startPos || length || initStyle || keywordlists[0] || styler.Length();\r
+}\r
+\r
+static const char * const camlWordListDesc[] = {\r
+       "Keywords",             // primary Objective Caml keywords\r
+       "Keywords2",    // "optional" keywords (typically from Pervasives)\r
+       "Keywords3",    // "optional" keywords (typically typenames)\r
+       0\r
+};\r
+\r
+#ifndef BUILD_AS_EXTERNAL_LEXER\r
+LexerModule lmCaml(SCLEX_CAML, ColouriseCamlDoc, "caml", FoldCamlDoc, camlWordListDesc);\r
+#endif /* BUILD_AS_EXTERNAL_LEXER */\r