--- /dev/null
+// Scintilla source code edit control\r
+/** @file LexCaml.cxx\r
+ ** Lexer for Objective Caml.\r
+ **/\r
+// Copyright 2005 by Robert Roessler <robertr@rftp.com>\r
+// The License.txt file describes the conditions under which this software may be distributed.\r
+/* Release History\r
+ 20050204 Initial release.\r
+ 20050205 Quick compiler standards/"cleanliness" adjustment.\r
+ 20050206 Added cast for IsLeadByte().\r
+ 20050209 Changes to "external" build support.\r
+ 20050306 Fix for 1st-char-in-doc "corner" case.\r
+ 20050502 Fix for [harmless] one-past-the-end coloring.\r
+ 20050515 Refined numeric token recognition logic.\r
+ 20051125 Added 2nd "optional" keywords class.\r
+ 20051129 Support "magic" (read-only) comments for RCaml.\r
+ 20051204 Swtich to using StyleContext infrastructure.\r
+*/\r
+\r
+#include <stdlib.h>\r
+#include <string.h>\r
+#include <ctype.h>\r
+#include <stdio.h>\r
+#include <stdarg.h>\r
+\r
+#include "Platform.h"\r
+\r
+#include "PropSet.h"\r
+#include "Accessor.h"\r
+#include "StyleContext.h"\r
+#include "KeyWords.h"\r
+#include "Scintilla.h"\r
+#include "SciLexer.h"\r
+\r
+// Since the Microsoft __iscsym[f] funcs are not ANSI...\r
+inline int iscaml(int c) {return isalnum(c) || c == '_';}\r
+inline int iscamlf(int c) {return isalpha(c) || c == '_';}\r
+inline int iscamld(int c) {return isdigit(c) || c == '_';}\r
+\r
+static const int baseT[24] = {\r
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */\r
+ 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */\r
+};\r
+\r
+#ifdef SCI_NAMESPACE\r
+using namespace Scintilla;\r
+#endif\r
+\r
+#ifdef BUILD_AS_EXTERNAL_LEXER\r
+/*\r
+ (actually seems to work!)\r
+*/\r
+#include "WindowAccessor.h"\r
+#include "ExternalLexer.h"\r
+\r
+#if PLAT_WIN\r
+#include <windows.h>\r
+#endif\r
+\r
+static void ColouriseCamlDoc(\r
+ unsigned int startPos, int length,\r
+ int initStyle,\r
+ WordList *keywordlists[],\r
+ Accessor &styler);\r
+\r
+static void FoldCamlDoc(\r
+ unsigned int startPos, int length,\r
+ int initStyle,\r
+ WordList *keywordlists[],\r
+ Accessor &styler);\r
+\r
+static void InternalLexOrFold(int lexOrFold, unsigned int startPos, int length,\r
+ int initStyle, char *words[], WindowID window, char *props);\r
+\r
+static const char* LexerName = "caml";\r
+\r
+#ifdef TRACE\r
+void Platform::DebugPrintf(const char *format, ...) {\r
+ char buffer[2000];\r
+ va_list pArguments;\r
+ va_start(pArguments, format);\r
+ vsprintf(buffer,format,pArguments);\r
+ va_end(pArguments);\r
+ Platform::DebugDisplay(buffer);\r
+}\r
+#else\r
+void Platform::DebugPrintf(const char *, ...) {\r
+}\r
+#endif\r
+\r
+bool Platform::IsDBCSLeadByte(int codePage, char ch) {\r
+ return ::IsDBCSLeadByteEx(codePage, ch) != 0;\r
+}\r
+\r
+long Platform::SendScintilla(WindowID w, unsigned int msg, unsigned long wParam, long lParam) {\r
+ return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam, lParam);\r
+}\r
+\r
+long Platform::SendScintillaPointer(WindowID w, unsigned int msg, unsigned long wParam, void *lParam) {\r
+ return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam,\r
+ reinterpret_cast<LPARAM>(lParam));\r
+}\r
+\r
+void EXT_LEXER_DECL Fold(unsigned int lexer, unsigned int startPos, int length,\r
+ int initStyle, char *words[], WindowID window, char *props)\r
+{\r
+ // below useless evaluation(s) to supress "not used" warnings\r
+ lexer;\r
+ // build expected data structures and do the Fold\r
+ InternalLexOrFold(1, startPos, length, initStyle, words, window, props);\r
+\r
+}\r
+\r
+int EXT_LEXER_DECL GetLexerCount()\r
+{\r
+ return 1; // just us [Objective] Caml lexers here!\r
+}\r
+\r
+void EXT_LEXER_DECL GetLexerName(unsigned int Index, char *name, int buflength)\r
+{\r
+ // below useless evaluation(s) to supress "not used" warnings\r
+ Index;\r
+ // return as much of our lexer name as will fit (what's up with Index?)\r
+ if (buflength > 0) {\r
+ buflength--;\r
+ int n = strlen(LexerName);\r
+ if (n > buflength)\r
+ n = buflength;\r
+ memcpy(name, LexerName, n), name[n] = '\0';\r
+ }\r
+}\r
+\r
+void EXT_LEXER_DECL Lex(unsigned int lexer, unsigned int startPos, int length,\r
+ int initStyle, char *words[], WindowID window, char *props)\r
+{\r
+ // below useless evaluation(s) to supress "not used" warnings\r
+ lexer;\r
+ // build expected data structures and do the Lex\r
+ InternalLexOrFold(0, startPos, length, initStyle, words, window, props);\r
+}\r
+\r
+static void InternalLexOrFold(int foldOrLex, unsigned int startPos, int length,\r
+ int initStyle, char *words[], WindowID window, char *props)\r
+{\r
+ // create and initialize a WindowAccessor (including contained PropSet)\r
+ PropSet ps;\r
+ ps.SetMultiple(props);\r
+ WindowAccessor wa(window, ps);\r
+ // create and initialize WordList(s)\r
+ int nWL = 0;\r
+ for (; words[nWL]; nWL++) ; // count # of WordList PTRs needed\r
+ WordList** wl = new WordList* [nWL + 1];// alloc WordList PTRs\r
+ int i = 0;\r
+ for (; i < nWL; i++) {\r
+ wl[i] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)\r
+ wl[i]->Set(words[i]);\r
+ }\r
+ wl[i] = 0;\r
+ // call our "internal" folder/lexer (... then do Flush!)\r
+ if (foldOrLex)\r
+ FoldCamlDoc(startPos, length, initStyle, wl, wa);\r
+ else\r
+ ColouriseCamlDoc(startPos, length, initStyle, wl, wa);\r
+ wa.Flush();\r
+ // clean up before leaving\r
+ for (i = nWL - 1; i >= 0; i--)\r
+ delete wl[i];\r
+ delete [] wl;\r
+}\r
+\r
+static\r
+#endif /* BUILD_AS_EXTERNAL_LEXER */\r
+\r
+void ColouriseCamlDoc(\r
+ unsigned int startPos, int length,\r
+ int initStyle,\r
+ WordList *keywordlists[],\r
+ Accessor &styler)\r
+{\r
+ // initialize styler\r
+ StyleContext sc(startPos, length, initStyle, styler);\r
+ // set up [initial] state info (terminating states that shouldn't "bleed")\r
+ int nesting = 0;\r
+ if (sc.state < SCE_CAML_STRING)\r
+ sc.state = SCE_CAML_DEFAULT;\r
+ if (sc.state >= SCE_CAML_COMMENT)\r
+ nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT;\r
+\r
+ int chBase = 0, chToken = 0, chLit = 0;\r
+ WordList& keywords = *keywordlists[0];\r
+ WordList& keywords2 = *keywordlists[1];\r
+ WordList& keywords3 = *keywordlists[2];\r
+ const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0);\r
+\r
+ // foreach char in range...\r
+ while (sc.More()) {\r
+ // set up [per-char] state info\r
+ int state2 = -1; // (ASSUME no state change)\r
+ int chColor = sc.currentPos - 1;// (ASSUME standard coloring range)\r
+ bool advance = true; // (ASSUME scanner "eats" 1 char)\r
+\r
+ // step state machine\r
+ switch (sc.state & 0x0f) {\r
+ case SCE_CAML_DEFAULT:\r
+ chToken = sc.currentPos; // save [possible] token start (JIC)\r
+ // it's wide open; what do we have?\r
+ if (iscamlf(sc.ch))\r
+ state2 = SCE_CAML_IDENTIFIER;\r
+ else if (sc.Match('`') && iscamlf(sc.chNext))\r
+ state2 = SCE_CAML_TAGNAME;\r
+ else if (sc.Match('#') && isdigit(sc.chNext))\r
+ state2 = SCE_CAML_LINENUM;\r
+ else if (isdigit(sc.ch)) {\r
+ state2 = SCE_CAML_NUMBER, chBase = 10;\r
+ if (sc.Match('0') && strchr("bBoOxX", sc.chNext))\r
+ chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward();\r
+ } else if (sc.Match('\'')) /* (char literal?) */\r
+ state2 = SCE_CAML_CHAR, chLit = 0;\r
+ else if (sc.Match('\"'))\r
+ state2 = SCE_CAML_STRING;\r
+ else if (sc.Match('(', '*'))\r
+ state2 = SCE_CAML_COMMENT,\r
+ sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)\r
+ sc.Forward();\r
+ else if (strchr("!?~" /* Caml "prefix-symbol" */\r
+ "=<>@^|&+-*/$%" /* Caml "infix-symbol" */\r
+ "()[]{};,:.#", sc.ch)) /* Caml "bracket" or ;,:.# */\r
+ state2 = SCE_CAML_OPERATOR;\r
+ break;\r
+\r
+ case SCE_CAML_IDENTIFIER:\r
+ // [try to] interpret as [additional] identifier char\r
+ if (!(iscaml(sc.ch) || sc.Match('\''))) {\r
+ const int n = sc.currentPos - chToken;\r
+ if (n < 24) {\r
+ // length is believable as keyword, [re-]construct token\r
+ char t[24];\r
+ for (int i = -n; i < 0; i++)\r
+ t[n + i] = static_cast<char>(sc.GetRelative(i));\r
+ t[n] = '\0';\r
+ // special-case "_" token as KEYWORD\r
+ if ((n == 1 && sc.chPrev == '_') || keywords.InList(t))\r
+ sc.ChangeState(SCE_CAML_KEYWORD);\r
+ else if (keywords2.InList(t))\r
+ sc.ChangeState(SCE_CAML_KEYWORD2);\r
+ else if (keywords3.InList(t))\r
+ sc.ChangeState(SCE_CAML_KEYWORD3);\r
+ }\r
+ state2 = SCE_CAML_DEFAULT, advance = false;\r
+ }\r
+ break;\r
+\r
+ case SCE_CAML_TAGNAME:\r
+ // [try to] interpret as [additional] tagname char\r
+ if (!(iscaml(sc.ch) || sc.Match('\'')))\r
+ state2 = SCE_CAML_DEFAULT, advance = false;\r
+ break;\r
+\r
+ /*case SCE_CAML_KEYWORD:\r
+ case SCE_CAML_KEYWORD2:\r
+ case SCE_CAML_KEYWORD3:\r
+ // [try to] interpret as [additional] keyword char\r
+ if (!iscaml(ch))\r
+ state2 = SCE_CAML_DEFAULT, advance = false;\r
+ break;*/\r
+\r
+ case SCE_CAML_LINENUM:\r
+ // [try to] interpret as [additional] linenum directive char\r
+ if (!isdigit(sc.ch))\r
+ state2 = SCE_CAML_DEFAULT, advance = false;\r
+ break;\r
+\r
+ case SCE_CAML_OPERATOR: {\r
+ // [try to] interpret as [additional] operator char\r
+ const char* o = 0;\r
+ if (iscaml(sc.ch) || isspace(sc.ch) /* ident or whitespace */\r
+ || (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */\r
+ || !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) {\r
+ // check for INCLUSIVE termination\r
+ if (o && strchr(")]};,", sc.ch)) {\r
+ if ((sc.Match(')') && sc.chPrev == '(')\r
+ || (sc.Match(']') && sc.chPrev == '['))\r
+ // special-case "()" and "[]" tokens as KEYWORDS\r
+ sc.ChangeState(SCE_CAML_KEYWORD);\r
+ chColor++;\r
+ } else\r
+ advance = false;\r
+ state2 = SCE_CAML_DEFAULT;\r
+ }\r
+ break;\r
+ }\r
+\r
+ case SCE_CAML_NUMBER:\r
+ // [try to] interpret as [additional] numeric literal char\r
+ // N.B. - improperly accepts "extra" digits in base 2 or 8 literals\r
+ if (iscamld(sc.ch) || IsADigit(sc.ch, chBase))\r
+ break;\r
+ // how about an integer suffix?\r
+ if ((sc.Match('l') || sc.Match('L') || sc.Match('n'))\r
+ && (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase)))\r
+ break;\r
+ // or a floating-point literal?\r
+ if (chBase == 10) {\r
+ // with a decimal point?\r
+ if (sc.Match('.') && iscamld(sc.chPrev))\r
+ break;\r
+ // with an exponent? (I)\r
+ if ((sc.Match('e') || sc.Match('E'))\r
+ && (iscamld(sc.chPrev) || sc.chPrev == '.'))\r
+ break;\r
+ // with an exponent? (II)\r
+ if ((sc.Match('+') || sc.Match('-'))\r
+ && (sc.chPrev == 'e' || sc.chPrev == 'E'))\r
+ break;\r
+ }\r
+ // it looks like we have run out of number\r
+ state2 = SCE_CAML_DEFAULT, advance = false;\r
+ break;\r
+\r
+ case SCE_CAML_CHAR:\r
+ // [try to] interpret as [additional] char literal char\r
+ if (sc.Match('\\')) {\r
+ chLit = 1; // (definitely IS a char literal)\r
+ if (sc.chPrev == '\\')\r
+ sc.ch = ' '; // (so termination test isn't fooled)\r
+ // should we be terminating - one way or another?\r
+ } else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) {\r
+ state2 = SCE_CAML_DEFAULT;\r
+ if (sc.Match('\''))\r
+ chColor++;\r
+ else\r
+ sc.ChangeState(SCE_CAML_IDENTIFIER);\r
+ // ... maybe a char literal, maybe not\r
+ } else if (chLit < 1 && sc.currentPos - chToken >= 2)\r
+ sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false;\r
+ break;\r
+\r
+ case SCE_CAML_STRING:\r
+ // [try to] interpret as [additional] string literal char\r
+ if (sc.Match('\\') && sc.chPrev == '\\')\r
+ sc.ch = ' '; // (so '\\' doesn't cause us trouble)\r
+ else if (sc.Match('\"') && sc.chPrev != '\\')\r
+ state2 = SCE_CAML_DEFAULT, chColor++;\r
+ break;\r
+\r
+ case SCE_CAML_COMMENT:\r
+ case SCE_CAML_COMMENT1:\r
+ case SCE_CAML_COMMENT2:\r
+ case SCE_CAML_COMMENT3:\r
+ // we're IN a comment - does this start a NESTED comment?\r
+ if (sc.Match('(', '*'))\r
+ state2 = sc.state + 1, chToken = sc.currentPos,\r
+ sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)\r
+ sc.Forward(), nesting++;\r
+ // [try to] interpret as [additional] comment char\r
+ else if (sc.Match(')') && sc.chPrev == '*') {\r
+ if (nesting)\r
+ state2 = (sc.state & 0x0f) - 1, chToken = 0, nesting--;\r
+ else\r
+ state2 = SCE_CAML_DEFAULT;\r
+ chColor++;\r
+ // enable "magic" (read-only) comment AS REQUIRED\r
+ } else if (useMagic && sc.currentPos - chToken == 4\r
+ && sc.Match('c') && sc.chPrev == 'r' && sc.GetRelative(-2) == '@')\r
+ sc.state |= 0x10; // (switch to read-only comment style)\r
+ break;\r
+ }\r
+\r
+ // handle state change and char coloring as required\r
+ if (state2 >= 0)\r
+ styler.ColourTo(chColor, sc.state), sc.ChangeState(state2);\r
+ // move to next char UNLESS re-scanning current char\r
+ if (advance)\r
+ sc.Forward();\r
+ }\r
+\r
+ // do any required terminal char coloring (JIC)\r
+ sc.Complete();\r
+}\r
+\r
+#ifdef BUILD_AS_EXTERNAL_LEXER\r
+static\r
+#endif /* BUILD_AS_EXTERNAL_LEXER */\r
+void FoldCamlDoc(\r
+ unsigned int startPos, int length,\r
+ int initStyle,\r
+ WordList *keywordlists[],\r
+ Accessor &styler)\r
+{\r
+ // below useless evaluation(s) to supress "not used" warnings\r
+ startPos || length || initStyle || keywordlists[0] || styler.Length();\r
+}\r
+\r
+static const char * const camlWordListDesc[] = {\r
+ "Keywords", // primary Objective Caml keywords\r
+ "Keywords2", // "optional" keywords (typically from Pervasives)\r
+ "Keywords3", // "optional" keywords (typically typenames)\r
+ 0\r
+};\r
+\r
+#ifndef BUILD_AS_EXTERNAL_LEXER\r
+LexerModule lmCaml(SCLEX_CAML, ColouriseCamlDoc, "caml", FoldCamlDoc, camlWordListDesc);\r
+#endif /* BUILD_AS_EXTERNAL_LEXER */\r