1 // Scintilla source code edit control
\r
2 /** @file LexBash.cxx
\r
5 // Copyright 2004-2008 by Neil Hodgson <neilh@scintilla.org>
\r
6 // Adapted from LexPerl by Kein-Hong Man 2004
\r
7 // The License.txt file describes the conditions under which this software may be distributed.
\r
15 #include "Platform.h"
\r
17 #include "PropSet.h"
\r
18 #include "Accessor.h"
\r
19 #include "StyleContext.h"
\r
20 #include "KeyWords.h"
\r
21 #include "Scintilla.h"
\r
22 #include "SciLexer.h"
\r
23 #include "CharacterSet.h"
\r
25 #ifdef SCI_NAMESPACE
\r
26 using namespace Scintilla;
\r
29 #define HERE_DELIM_MAX 256
\r
31 // define this if you want 'invalid octals' to be marked as errors
\r
32 // usually, this is not a good idea, permissive lexing is better
\r
33 #undef PEDANTIC_OCTAL
\r
35 #define BASH_BASE_ERROR 65
\r
36 #define BASH_BASE_DECIMAL 66
\r
37 #define BASH_BASE_HEX 67
\r
38 #ifdef PEDANTIC_OCTAL
\r
39 #define BASH_BASE_OCTAL 68
\r
40 #define BASH_BASE_OCTAL_ERROR 69
\r
43 static inline int translateBashDigit(int ch) {
\r
44 if (ch >= '0' && ch <= '9') {
\r
46 } else if (ch >= 'a' && ch <= 'z') {
\r
47 return ch - 'a' + 10;
\r
48 } else if (ch >= 'A' && ch <= 'Z') {
\r
49 return ch - 'A' + 36;
\r
50 } else if (ch == '@') {
\r
52 } else if (ch == '_') {
\r
55 return BASH_BASE_ERROR;
\r
58 static inline int getBashNumberBase(char *s) {
\r
62 base = base * 10 + (*s++ - '0');
\r
65 if (base > 64 || i > 2) {
\r
66 return BASH_BASE_ERROR;
\r
71 static int opposite(int ch) {
\r
72 if (ch == '(') return ')';
\r
73 if (ch == '[') return ']';
\r
74 if (ch == '{') return '}';
\r
75 if (ch == '<') return '>';
\r
79 static void ColouriseBashDoc(unsigned int startPos, int length, int initStyle,
\r
80 WordList *keywordlists[], Accessor &styler) {
\r
82 WordList &keywords = *keywordlists[0];
\r
84 CharacterSet setWordStart(CharacterSet::setAlpha, "_");
\r
85 // note that [+-] are often parts of identifiers in shell scripts
\r
86 CharacterSet setWord(CharacterSet::setAlphaNum, "._+-");
\r
87 CharacterSet setBashOperator(CharacterSet::setNone, "^&\\%()-+=|{}[]:;>,*/<?!.~@");
\r
88 CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMACahGLNn");
\r
89 CharacterSet setParam(CharacterSet::setAlphaNum, "$_");
\r
90 CharacterSet setHereDoc(CharacterSet::setAlpha, "_\\-+!");
\r
91 CharacterSet setHereDoc2(CharacterSet::setAlphaNum, "_-+!");
\r
92 CharacterSet setLeftShift(CharacterSet::setDigits, "=$");
\r
94 class HereDocCls { // Class to manage HERE document elements
\r
96 int State; // 0: '<<' encountered
\r
97 // 1: collect the delimiter
\r
98 // 2: here doc text (lines after the delimiter)
\r
99 int Quote; // the char after '<<'
\r
100 bool Quoted; // true if Quote in ('\'','"','`')
\r
101 bool Indent; // indented delimiter (for <<-)
\r
102 int DelimiterLength; // strlen(Delimiter)
\r
103 char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
\r
109 DelimiterLength = 0;
\r
110 Delimiter = new char[HERE_DELIM_MAX];
\r
111 Delimiter[0] = '\0';
\r
113 void Append(int ch) {
\r
114 Delimiter[DelimiterLength++] = static_cast<char>(ch);
\r
115 Delimiter[DelimiterLength] = '\0';
\r
118 delete []Delimiter;
\r
121 HereDocCls HereDoc;
\r
123 class QuoteCls { // Class to manage quote pairs (simplified vs LexPerl)
\r
135 Down = opposite(Up);
\r
137 void Start(int u) {
\r
146 unsigned int endPos = startPos + length;
\r
148 // Backtrack to beginning of style if required...
\r
149 // If in a long distance lexical state, backtrack to find quote characters
\r
150 if (initStyle == SCE_SH_HERE_Q) {
\r
151 while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_SH_HERE_DELIM)) {
\r
154 startPos = styler.LineStart(styler.GetLine(startPos));
\r
155 initStyle = styler.StyleAt(startPos - 1);
\r
157 // Bash strings can be multi-line with embedded newlines, so backtrack.
\r
158 // Bash numbers have additional state during lexing, so backtrack too.
\r
159 if (initStyle == SCE_SH_STRING
\r
160 || initStyle == SCE_SH_BACKTICKS
\r
161 || initStyle == SCE_SH_CHARACTER
\r
162 || initStyle == SCE_SH_NUMBER
\r
163 || initStyle == SCE_SH_IDENTIFIER
\r
164 || initStyle == SCE_SH_COMMENTLINE) {
\r
165 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
\r
168 initStyle = SCE_SH_DEFAULT;
\r
171 StyleContext sc(startPos, endPos - startPos, initStyle, styler);
\r
173 for (; sc.More(); sc.Forward()) {
\r
175 // Determine if the current state should terminate.
\r
176 switch (sc.state) {
\r
177 case SCE_SH_OPERATOR:
\r
178 sc.SetState(SCE_SH_DEFAULT);
\r
181 // "." never used in Bash variable names but used in file names
\r
182 if (!setWord.Contains(sc.ch)) {
\r
184 sc.GetCurrent(s, sizeof(s));
\r
185 if (s[0] != '-' && // for file operators
\r
186 !keywords.InList(s)) {
\r
187 sc.ChangeState(SCE_SH_IDENTIFIER);
\r
189 sc.SetState(SCE_SH_DEFAULT);
\r
192 case SCE_SH_IDENTIFIER:
\r
193 if (sc.chPrev == '\\') { // for escaped chars
\r
194 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
195 } else if (!setWord.Contains(sc.ch)) {
\r
196 sc.SetState(SCE_SH_DEFAULT);
\r
199 case SCE_SH_NUMBER:
\r
200 digit = translateBashDigit(sc.ch);
\r
201 if (numBase == BASH_BASE_DECIMAL) {
\r
202 if (sc.ch == '#') {
\r
204 sc.GetCurrent(s, sizeof(s));
\r
205 numBase = getBashNumberBase(s);
\r
206 if (numBase != BASH_BASE_ERROR)
\r
208 } else if (IsADigit(sc.ch))
\r
210 } else if (numBase == BASH_BASE_HEX) {
\r
211 if (IsADigit(sc.ch, 16))
\r
213 #ifdef PEDANTIC_OCTAL
\r
214 } else if (numBase == BASH_BASE_OCTAL ||
\r
215 numBase == BASH_BASE_OCTAL_ERROR) {
\r
219 numBase = BASH_BASE_OCTAL_ERROR;
\r
223 } else if (numBase == BASH_BASE_ERROR) {
\r
226 } else { // DD#DDDD number style handling
\r
227 if (digit != BASH_BASE_ERROR) {
\r
228 if (numBase <= 36) {
\r
229 // case-insensitive if base<=36
\r
230 if (digit >= 36) digit -= 26;
\r
232 if (digit < numBase)
\r
235 numBase = BASH_BASE_ERROR;
\r
240 // fallthrough when number is at an end or error
\r
241 if (numBase == BASH_BASE_ERROR
\r
242 #ifdef PEDANTIC_OCTAL
\r
243 || numBase == BASH_BASE_OCTAL_ERROR
\r
246 sc.ChangeState(SCE_SH_ERROR);
\r
248 sc.SetState(SCE_SH_DEFAULT);
\r
250 case SCE_SH_COMMENTLINE:
\r
251 if (sc.ch == '\\' && (sc.chNext == '\r' || sc.chNext == '\n')) {
\r
252 // comment continuation
\r
254 if (sc.ch == '\r' && sc.chNext == '\n') {
\r
257 } else if (sc.atLineEnd) {
\r
258 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
261 case SCE_SH_HERE_DELIM:
\r
264 // Specifier format is: <<[-]WORD
\r
265 // Optional '-' is for removal of leading tabs from here-doc.
\r
266 // Whitespace acceptable after <<[-] operator
\r
268 if (HereDoc.State == 0) { // '<<' encountered
\r
269 HereDoc.Quote = sc.chNext;
\r
270 HereDoc.Quoted = false;
\r
271 HereDoc.DelimiterLength = 0;
\r
272 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
\r
273 if (sc.chNext == '\'' || sc.chNext == '\"') { // a quoted here-doc delimiter (' or ")
\r
275 HereDoc.Quoted = true;
\r
277 } else if (!HereDoc.Indent && sc.chNext == '-') { // <<- indent case
\r
278 HereDoc.Indent = true;
\r
279 } else if (setHereDoc.Contains(sc.chNext)) {
\r
280 // an unquoted here-doc delimiter, no special handling
\r
281 // TODO check what exactly bash considers part of the delim
\r
283 } else if (sc.chNext == '<') { // HERE string <<<
\r
285 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
286 } else if (IsASpace(sc.chNext)) {
\r
288 } else if (setLeftShift.Contains(sc.chNext)) {
\r
289 // left shift << or <<= operator cases
\r
290 sc.ChangeState(SCE_SH_OPERATOR);
\r
291 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
293 // symbols terminates; deprecated zero-length delimiter
\r
296 } else if (HereDoc.State == 1) { // collect the delimiter
\r
297 if (HereDoc.Quoted) { // a quoted here-doc delimiter
\r
298 if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
\r
299 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
301 if (sc.ch == '\\' && sc.chNext == HereDoc.Quote) { // escaped quote
\r
304 HereDoc.Append(sc.ch);
\r
306 } else { // an unquoted here-doc delimiter
\r
307 if (setHereDoc2.Contains(sc.ch)) {
\r
308 HereDoc.Append(sc.ch);
\r
309 } else if (sc.ch == '\\') {
\r
310 // skip escape prefix
\r
312 sc.SetState(SCE_SH_DEFAULT);
\r
315 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) { // force blowup
\r
316 sc.SetState(SCE_SH_ERROR);
\r
321 case SCE_SH_HERE_Q:
\r
322 // HereDoc.State == 2
\r
323 if (sc.atLineStart) {
\r
324 sc.SetState(SCE_SH_HERE_Q);
\r
326 while (IsASpace(sc.ch) && !sc.atLineEnd) { // whitespace prefix
\r
331 sc.SetState(SCE_SH_HERE_Q);
\r
332 while (!sc.atLineEnd) {
\r
335 char s[HERE_DELIM_MAX];
\r
336 sc.GetCurrent(s, sizeof(s));
\r
337 if (strcmp(HereDoc.Delimiter, s) == 0) {
\r
338 if ((prefixws > 0 && HereDoc.Indent) || // indentation rule
\r
339 (prefixws == 0 && !HereDoc.Indent)) {
\r
340 sc.SetState(SCE_SH_DEFAULT);
\r
346 case SCE_SH_SCALAR: // variable names
\r
347 if (!setParam.Contains(sc.ch)) {
\r
348 if (sc.LengthCurrent() == 1) {
\r
349 // Special variable: $(, $_ etc.
\r
350 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
352 sc.SetState(SCE_SH_DEFAULT);
\r
356 case SCE_SH_STRING: // delimited styles
\r
357 case SCE_SH_CHARACTER:
\r
358 case SCE_SH_BACKTICKS:
\r
360 if (sc.ch == '\\' && Quote.Up != '\\') {
\r
362 } else if (sc.ch == Quote.Down) {
\r
364 if (Quote.Count == 0) {
\r
365 sc.ForwardSetState(SCE_SH_DEFAULT);
\r
367 } else if (sc.ch == Quote.Up) {
\r
373 // Must check end of HereDoc state 1 before default state is handled
\r
374 if (HereDoc.State == 1 && sc.atLineEnd) {
\r
375 // Begin of here-doc (the line after the here-doc delimiter):
\r
376 // Lexically, the here-doc starts from the next line after the >>, but the
\r
377 // first line of here-doc seem to follow the style of the last EOL sequence
\r
379 if (HereDoc.Quoted) {
\r
380 if (sc.state == SCE_SH_HERE_DELIM) {
\r
381 // Missing quote at end of string! We are stricter than bash.
\r
382 // Colour here-doc anyway while marking this bit as an error.
\r
383 sc.ChangeState(SCE_SH_ERROR);
\r
385 // HereDoc.Quote always == '\''
\r
387 sc.SetState(SCE_SH_HERE_Q);
\r
390 // Determine if a new state should be entered.
\r
391 if (sc.state == SCE_SH_DEFAULT) {
\r
392 if (sc.ch == '\\') { // escaped character
\r
393 sc.SetState(SCE_SH_IDENTIFIER);
\r
394 } else if (IsADigit(sc.ch)) {
\r
395 sc.SetState(SCE_SH_NUMBER);
\r
396 numBase = BASH_BASE_DECIMAL;
\r
397 if (sc.ch == '0') { // hex,octal
\r
398 if (sc.chNext == 'x' || sc.chNext == 'X') {
\r
399 numBase = BASH_BASE_HEX;
\r
401 } else if (IsADigit(sc.chNext)) {
\r
402 #ifdef PEDANTIC_OCTAL
\r
403 numBase = BASH_BASE_OCTAL;
\r
405 numBase = BASH_BASE_HEX;
\r
409 } else if (setWordStart.Contains(sc.ch)) {
\r
410 sc.SetState(SCE_SH_WORD);
\r
411 } else if (sc.ch == '#') {
\r
412 sc.SetState(SCE_SH_COMMENTLINE);
\r
413 } else if (sc.ch == '\"') {
\r
414 sc.SetState(SCE_SH_STRING);
\r
415 Quote.Start(sc.ch);
\r
416 } else if (sc.ch == '\'') {
\r
417 sc.SetState(SCE_SH_CHARACTER);
\r
418 Quote.Start(sc.ch);
\r
419 } else if (sc.ch == '`') {
\r
420 sc.SetState(SCE_SH_BACKTICKS);
\r
421 Quote.Start(sc.ch);
\r
422 } else if (sc.ch == '$') {
\r
423 sc.SetState(SCE_SH_SCALAR);
\r
425 if (sc.ch == '{') {
\r
426 sc.ChangeState(SCE_SH_PARAM);
\r
427 } else if (sc.ch == '\'') {
\r
428 sc.ChangeState(SCE_SH_CHARACTER);
\r
429 } else if (sc.ch == '"') {
\r
430 sc.ChangeState(SCE_SH_STRING);
\r
431 } else if (sc.ch == '(' || sc.ch == '`') {
\r
432 sc.ChangeState(SCE_SH_BACKTICKS);
\r
433 if (sc.chNext == '(') { // $(( is lexed as operator
\r
434 sc.ChangeState(SCE_SH_OPERATOR);
\r
437 continue; // scalar has no delimiter pair
\r
439 // fallthrough, open delim for $[{'"(`]
\r
440 Quote.Start(sc.ch);
\r
441 } else if (sc.Match('<', '<')) {
\r
442 sc.SetState(SCE_SH_HERE_DELIM);
\r
444 HereDoc.Indent = false;
\r
445 } else if (sc.ch == '-' && // one-char file test operators
\r
446 setSingleCharOp.Contains(sc.chNext) &&
\r
447 !setWord.Contains(sc.GetRelative(2)) &&
\r
448 IsASpace(sc.chPrev)) {
\r
449 sc.SetState(SCE_SH_WORD);
\r
451 } else if (setBashOperator.Contains(sc.ch)) {
\r
452 sc.SetState(SCE_SH_OPERATOR);
\r
459 static bool IsCommentLine(int line, Accessor &styler) {
\r
460 int pos = styler.LineStart(line);
\r
461 int eol_pos = styler.LineStart(line + 1) - 1;
\r
462 for (int i = pos; i < eol_pos; i++) {
\r
463 char ch = styler[i];
\r
466 else if (ch != ' ' && ch != '\t')
\r
472 static void FoldBashDoc(unsigned int startPos, int length, int, WordList *[],
\r
473 Accessor &styler) {
\r
474 bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
\r
475 bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
\r
476 unsigned int endPos = startPos + length;
\r
477 int visibleChars = 0;
\r
478 int lineCurrent = styler.GetLine(startPos);
\r
479 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
\r
480 int levelCurrent = levelPrev;
\r
481 char chNext = styler[startPos];
\r
482 int styleNext = styler.StyleAt(startPos);
\r
483 for (unsigned int i = startPos; i < endPos; i++) {
\r
485 chNext = styler.SafeGetCharAt(i + 1);
\r
486 int style = styleNext;
\r
487 styleNext = styler.StyleAt(i + 1);
\r
488 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
\r
490 if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
\r
492 if (!IsCommentLine(lineCurrent - 1, styler)
\r
493 && IsCommentLine(lineCurrent + 1, styler))
\r
495 else if (IsCommentLine(lineCurrent - 1, styler)
\r
496 && !IsCommentLine(lineCurrent + 1, styler))
\r
499 if (style == SCE_SH_OPERATOR) {
\r
502 } else if (ch == '}') {
\r
507 int lev = levelPrev;
\r
508 if (visibleChars == 0 && foldCompact)
\r
509 lev |= SC_FOLDLEVELWHITEFLAG;
\r
510 if ((levelCurrent > levelPrev) && (visibleChars > 0))
\r
511 lev |= SC_FOLDLEVELHEADERFLAG;
\r
512 if (lev != styler.LevelAt(lineCurrent)) {
\r
513 styler.SetLevel(lineCurrent, lev);
\r
516 levelPrev = levelCurrent;
\r
519 if (!isspacechar(ch))
\r
522 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
\r
523 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
\r
524 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
\r
527 static const char * const bashWordListDesc[] = {
\r
532 LexerModule lmBash(SCLEX_BASH, ColouriseBashDoc, "bash", FoldBashDoc, bashWordListDesc);
\r