1 // Scintilla source code edit control
\r
2 /** @file LexCaml.cxx
\r
3 ** Lexer for Objective Caml.
\r
5 // Copyright 2005 by Robert Roessler <robertr@rftp.com>
\r
6 // The License.txt file describes the conditions under which this software may be distributed.
\r
8 20050204 Initial release.
\r
9 20050205 Quick compiler standards/"cleanliness" adjustment.
\r
10 20050206 Added cast for IsLeadByte().
\r
11 20050209 Changes to "external" build support.
\r
12 20050306 Fix for 1st-char-in-doc "corner" case.
\r
13 20050502 Fix for [harmless] one-past-the-end coloring.
\r
14 20050515 Refined numeric token recognition logic.
\r
15 20051125 Added 2nd "optional" keywords class.
\r
16 20051129 Support "magic" (read-only) comments for RCaml.
\r
17 20051204 Swtich to using StyleContext infrastructure.
\r
26 #include "Platform.h"
\r
28 #include "PropSet.h"
\r
29 #include "Accessor.h"
\r
30 #include "StyleContext.h"
\r
31 #include "KeyWords.h"
\r
32 #include "Scintilla.h"
\r
33 #include "SciLexer.h"
\r
35 // Since the Microsoft __iscsym[f] funcs are not ANSI...
\r
36 inline int iscaml(int c) {return isalnum(c) || c == '_';}
\r
37 inline int iscamlf(int c) {return isalpha(c) || c == '_';}
\r
38 inline int iscamld(int c) {return isdigit(c) || c == '_';}
\r
40 static const int baseT[24] = {
\r
41 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
\r
42 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
\r
45 #ifdef SCI_NAMESPACE
\r
46 using namespace Scintilla;
\r
49 #ifdef BUILD_AS_EXTERNAL_LEXER
\r
51 (actually seems to work!)
\r
53 #include "WindowAccessor.h"
\r
54 #include "ExternalLexer.h"
\r
57 #include <windows.h>
\r
60 static void ColouriseCamlDoc(
\r
61 unsigned int startPos, int length,
\r
63 WordList *keywordlists[],
\r
66 static void FoldCamlDoc(
\r
67 unsigned int startPos, int length,
\r
69 WordList *keywordlists[],
\r
72 static void InternalLexOrFold(int lexOrFold, unsigned int startPos, int length,
\r
73 int initStyle, char *words[], WindowID window, char *props);
\r
75 static const char* LexerName = "caml";
\r
78 void Platform::DebugPrintf(const char *format, ...) {
\r
81 va_start(pArguments, format);
\r
82 vsprintf(buffer,format,pArguments);
\r
84 Platform::DebugDisplay(buffer);
\r
87 void Platform::DebugPrintf(const char *, ...) {
\r
91 bool Platform::IsDBCSLeadByte(int codePage, char ch) {
\r
92 return ::IsDBCSLeadByteEx(codePage, ch) != 0;
\r
95 long Platform::SendScintilla(WindowID w, unsigned int msg, unsigned long wParam, long lParam) {
\r
96 return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam, lParam);
\r
99 long Platform::SendScintillaPointer(WindowID w, unsigned int msg, unsigned long wParam, void *lParam) {
\r
100 return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam,
\r
101 reinterpret_cast<LPARAM>(lParam));
\r
104 void EXT_LEXER_DECL Fold(unsigned int lexer, unsigned int startPos, int length,
\r
105 int initStyle, char *words[], WindowID window, char *props)
\r
107 // below useless evaluation(s) to supress "not used" warnings
\r
109 // build expected data structures and do the Fold
\r
110 InternalLexOrFold(1, startPos, length, initStyle, words, window, props);
\r
114 int EXT_LEXER_DECL GetLexerCount()
\r
116 return 1; // just us [Objective] Caml lexers here!
\r
119 void EXT_LEXER_DECL GetLexerName(unsigned int Index, char *name, int buflength)
\r
121 // below useless evaluation(s) to supress "not used" warnings
\r
123 // return as much of our lexer name as will fit (what's up with Index?)
\r
124 if (buflength > 0) {
\r
126 int n = strlen(LexerName);
\r
129 memcpy(name, LexerName, n), name[n] = '\0';
\r
133 void EXT_LEXER_DECL Lex(unsigned int lexer, unsigned int startPos, int length,
\r
134 int initStyle, char *words[], WindowID window, char *props)
\r
136 // below useless evaluation(s) to supress "not used" warnings
\r
138 // build expected data structures and do the Lex
\r
139 InternalLexOrFold(0, startPos, length, initStyle, words, window, props);
\r
142 static void InternalLexOrFold(int foldOrLex, unsigned int startPos, int length,
\r
143 int initStyle, char *words[], WindowID window, char *props)
\r
145 // create and initialize a WindowAccessor (including contained PropSet)
\r
147 ps.SetMultiple(props);
\r
148 WindowAccessor wa(window, ps);
\r
149 // create and initialize WordList(s)
\r
151 for (; words[nWL]; nWL++) ; // count # of WordList PTRs needed
\r
152 WordList** wl = new WordList* [nWL + 1];// alloc WordList PTRs
\r
154 for (; i < nWL; i++) {
\r
155 wl[i] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
\r
156 wl[i]->Set(words[i]);
\r
159 // call our "internal" folder/lexer (... then do Flush!)
\r
161 FoldCamlDoc(startPos, length, initStyle, wl, wa);
\r
163 ColouriseCamlDoc(startPos, length, initStyle, wl, wa);
\r
165 // clean up before leaving
\r
166 for (i = nWL - 1; i >= 0; i--)
\r
172 #endif /* BUILD_AS_EXTERNAL_LEXER */
\r
174 void ColouriseCamlDoc(
\r
175 unsigned int startPos, int length,
\r
177 WordList *keywordlists[],
\r
180 // initialize styler
\r
181 StyleContext sc(startPos, length, initStyle, styler);
\r
182 // set up [initial] state info (terminating states that shouldn't "bleed")
\r
184 if (sc.state < SCE_CAML_STRING)
\r
185 sc.state = SCE_CAML_DEFAULT;
\r
186 if (sc.state >= SCE_CAML_COMMENT)
\r
187 nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT;
\r
189 int chBase = 0, chToken = 0, chLit = 0;
\r
190 WordList& keywords = *keywordlists[0];
\r
191 WordList& keywords2 = *keywordlists[1];
\r
192 WordList& keywords3 = *keywordlists[2];
\r
193 const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0);
\r
195 // foreach char in range...
\r
196 while (sc.More()) {
\r
197 // set up [per-char] state info
\r
198 int state2 = -1; // (ASSUME no state change)
\r
199 int chColor = sc.currentPos - 1;// (ASSUME standard coloring range)
\r
200 bool advance = true; // (ASSUME scanner "eats" 1 char)
\r
202 // step state machine
\r
203 switch (sc.state & 0x0f) {
\r
204 case SCE_CAML_DEFAULT:
\r
205 chToken = sc.currentPos; // save [possible] token start (JIC)
\r
206 // it's wide open; what do we have?
\r
207 if (iscamlf(sc.ch))
\r
208 state2 = SCE_CAML_IDENTIFIER;
\r
209 else if (sc.Match('`') && iscamlf(sc.chNext))
\r
210 state2 = SCE_CAML_TAGNAME;
\r
211 else if (sc.Match('#') && isdigit(sc.chNext))
\r
212 state2 = SCE_CAML_LINENUM;
\r
213 else if (isdigit(sc.ch)) {
\r
214 state2 = SCE_CAML_NUMBER, chBase = 10;
\r
215 if (sc.Match('0') && strchr("bBoOxX", sc.chNext))
\r
216 chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward();
\r
217 } else if (sc.Match('\'')) /* (char literal?) */
\r
218 state2 = SCE_CAML_CHAR, chLit = 0;
\r
219 else if (sc.Match('\"'))
\r
220 state2 = SCE_CAML_STRING;
\r
221 else if (sc.Match('(', '*'))
\r
222 state2 = SCE_CAML_COMMENT,
\r
223 sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)
\r
225 else if (strchr("!?~" /* Caml "prefix-symbol" */
\r
226 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
\r
227 "()[]{};,:.#", sc.ch)) /* Caml "bracket" or ;,:.# */
\r
228 state2 = SCE_CAML_OPERATOR;
\r
231 case SCE_CAML_IDENTIFIER:
\r
232 // [try to] interpret as [additional] identifier char
\r
233 if (!(iscaml(sc.ch) || sc.Match('\''))) {
\r
234 const int n = sc.currentPos - chToken;
\r
236 // length is believable as keyword, [re-]construct token
\r
238 for (int i = -n; i < 0; i++)
\r
239 t[n + i] = static_cast<char>(sc.GetRelative(i));
\r
241 // special-case "_" token as KEYWORD
\r
242 if ((n == 1 && sc.chPrev == '_') || keywords.InList(t))
\r
243 sc.ChangeState(SCE_CAML_KEYWORD);
\r
244 else if (keywords2.InList(t))
\r
245 sc.ChangeState(SCE_CAML_KEYWORD2);
\r
246 else if (keywords3.InList(t))
\r
247 sc.ChangeState(SCE_CAML_KEYWORD3);
\r
249 state2 = SCE_CAML_DEFAULT, advance = false;
\r
253 case SCE_CAML_TAGNAME:
\r
254 // [try to] interpret as [additional] tagname char
\r
255 if (!(iscaml(sc.ch) || sc.Match('\'')))
\r
256 state2 = SCE_CAML_DEFAULT, advance = false;
\r
259 /*case SCE_CAML_KEYWORD:
\r
260 case SCE_CAML_KEYWORD2:
\r
261 case SCE_CAML_KEYWORD3:
\r
262 // [try to] interpret as [additional] keyword char
\r
264 state2 = SCE_CAML_DEFAULT, advance = false;
\r
267 case SCE_CAML_LINENUM:
\r
268 // [try to] interpret as [additional] linenum directive char
\r
269 if (!isdigit(sc.ch))
\r
270 state2 = SCE_CAML_DEFAULT, advance = false;
\r
273 case SCE_CAML_OPERATOR: {
\r
274 // [try to] interpret as [additional] operator char
\r
276 if (iscaml(sc.ch) || isspace(sc.ch) /* ident or whitespace */
\r
277 || (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */
\r
278 || !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) {
\r
279 // check for INCLUSIVE termination
\r
280 if (o && strchr(")]};,", sc.ch)) {
\r
281 if ((sc.Match(')') && sc.chPrev == '(')
\r
282 || (sc.Match(']') && sc.chPrev == '['))
\r
283 // special-case "()" and "[]" tokens as KEYWORDS
\r
284 sc.ChangeState(SCE_CAML_KEYWORD);
\r
288 state2 = SCE_CAML_DEFAULT;
\r
293 case SCE_CAML_NUMBER:
\r
294 // [try to] interpret as [additional] numeric literal char
\r
295 // N.B. - improperly accepts "extra" digits in base 2 or 8 literals
\r
296 if (iscamld(sc.ch) || IsADigit(sc.ch, chBase))
\r
298 // how about an integer suffix?
\r
299 if ((sc.Match('l') || sc.Match('L') || sc.Match('n'))
\r
300 && (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase)))
\r
302 // or a floating-point literal?
\r
303 if (chBase == 10) {
\r
304 // with a decimal point?
\r
305 if (sc.Match('.') && iscamld(sc.chPrev))
\r
307 // with an exponent? (I)
\r
308 if ((sc.Match('e') || sc.Match('E'))
\r
309 && (iscamld(sc.chPrev) || sc.chPrev == '.'))
\r
311 // with an exponent? (II)
\r
312 if ((sc.Match('+') || sc.Match('-'))
\r
313 && (sc.chPrev == 'e' || sc.chPrev == 'E'))
\r
316 // it looks like we have run out of number
\r
317 state2 = SCE_CAML_DEFAULT, advance = false;
\r
320 case SCE_CAML_CHAR:
\r
321 // [try to] interpret as [additional] char literal char
\r
322 if (sc.Match('\\')) {
\r
323 chLit = 1; // (definitely IS a char literal)
\r
324 if (sc.chPrev == '\\')
\r
325 sc.ch = ' '; // (so termination test isn't fooled)
\r
326 // should we be terminating - one way or another?
\r
327 } else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) {
\r
328 state2 = SCE_CAML_DEFAULT;
\r
329 if (sc.Match('\''))
\r
332 sc.ChangeState(SCE_CAML_IDENTIFIER);
\r
333 // ... maybe a char literal, maybe not
\r
334 } else if (chLit < 1 && sc.currentPos - chToken >= 2)
\r
335 sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false;
\r
338 case SCE_CAML_STRING:
\r
339 // [try to] interpret as [additional] string literal char
\r
340 if (sc.Match('\\') && sc.chPrev == '\\')
\r
341 sc.ch = ' '; // (so '\\' doesn't cause us trouble)
\r
342 else if (sc.Match('\"') && sc.chPrev != '\\')
\r
343 state2 = SCE_CAML_DEFAULT, chColor++;
\r
346 case SCE_CAML_COMMENT:
\r
347 case SCE_CAML_COMMENT1:
\r
348 case SCE_CAML_COMMENT2:
\r
349 case SCE_CAML_COMMENT3:
\r
350 // we're IN a comment - does this start a NESTED comment?
\r
351 if (sc.Match('(', '*'))
\r
352 state2 = sc.state + 1, chToken = sc.currentPos,
\r
353 sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)
\r
354 sc.Forward(), nesting++;
\r
355 // [try to] interpret as [additional] comment char
\r
356 else if (sc.Match(')') && sc.chPrev == '*') {
\r
358 state2 = (sc.state & 0x0f) - 1, chToken = 0, nesting--;
\r
360 state2 = SCE_CAML_DEFAULT;
\r
362 // enable "magic" (read-only) comment AS REQUIRED
\r
363 } else if (useMagic && sc.currentPos - chToken == 4
\r
364 && sc.Match('c') && sc.chPrev == 'r' && sc.GetRelative(-2) == '@')
\r
365 sc.state |= 0x10; // (switch to read-only comment style)
\r
369 // handle state change and char coloring as required
\r
371 styler.ColourTo(chColor, sc.state), sc.ChangeState(state2);
\r
372 // move to next char UNLESS re-scanning current char
\r
377 // do any required terminal char coloring (JIC)
\r
381 #ifdef BUILD_AS_EXTERNAL_LEXER
\r
383 #endif /* BUILD_AS_EXTERNAL_LEXER */
\r
385 unsigned int startPos, int length,
\r
387 WordList *keywordlists[],
\r
390 // below useless evaluation(s) to supress "not used" warnings
\r
391 startPos || length || initStyle || keywordlists[0] || styler.Length();
\r
394 static const char * const camlWordListDesc[] = {
\r
395 "Keywords", // primary Objective Caml keywords
\r
396 "Keywords2", // "optional" keywords (typically from Pervasives)
\r
397 "Keywords3", // "optional" keywords (typically typenames)
\r
401 #ifndef BUILD_AS_EXTERNAL_LEXER
\r
402 LexerModule lmCaml(SCLEX_CAML, ColouriseCamlDoc, "caml", FoldCamlDoc, camlWordListDesc);
\r
403 #endif /* BUILD_AS_EXTERNAL_LEXER */
\r