diff --git a/BackgroundTokenizer.js b/BackgroundTokenizer.js index 91d89aec..8b3d73b3 100644 --- a/BackgroundTokenizer.js +++ b/BackgroundTokenizer.js @@ -71,7 +71,11 @@ BackgroundTokenizer.prototype.getTokens = function(row) if (this.lines[row]) { return this.lines[row].tokens; } else { - return getLineTokens(this.textLines[row] || "", "start").tokens; + var state = "start"; + if (row > 0 && this.lines[row-1]) { + state = this.lines[row-1].state; + } + return getLineTokens(this.textLines[row] || "", state).tokens; } }; @@ -101,14 +105,98 @@ var keywords = { "with" : 1 }; -getLineTokens = function(line, state) +getLineTokens = function(line, startState) { - var tokens = []; + var rules = { + start : + [ + { + token: "comment", + regex: "\\/\\/.*$" + }, + { + token: "comment", // multi line comment in one line + regex: "\\/\\*.*?\\*\\/" + }, + { + token: "comment", // multi line comment in several lines + regex: "\\/\\*.*$", + next: "comment" + }, + { + token: "string", // single line + regex: '["][^"]*["]' + }, + { + token: "string", // single line + regex: "['][^']*[']" + }, + { + token: "number", // hex + regex: "0[xX][0-9a-fA-F]+\\b" + }, + { + token: "number", // float + regex: "[+-]?\\d+(?:(?:\\.\\d*)?(?:[eE][+-]?\\d+)?)?\\b" + }, + { + token: function(value) + { + if (keywords[value]) { + return "keyword"; + } else { + return "identifier" + } + }, + regex: "[a-zA-Z_][a-zA-Z0-9_]*\\b" + }, + { + token: function(value) { + //return parens[value]; + return "text"; + }, + regex: "[\\[\\]\\(\\)\\{\\}]" + }, + { + token: "text", + regex: "\\s+" + } + ], + "comment": + [ + { + token: "comment", // closing comment + regex: ".*?\\*\\/", + next: "start" + }, + { + token: "comment", // comment spanning whole line + regex: ".+" + } + ] + }; - var re = /(?:(\s+)|("[^"]*")|('[^']*')|([\[\]\(\)\{\}])|([a-zA-Z_][a-zA-Z0-9_]*)|(\/\/.*)|(.))/g - re.lastIndex = 0; + var regExps = {}; + for (var key in rules) + { + var state = rules[key]; + var ruleRegExps = []; + + for (var i=0; i < state.length; i++) { + ruleRegExps.push(state[i].regex); + }; + + regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g"); + } - var match; + + var currentState = startState; + var state = rules[currentState]; + var re = regExps[currentState]; + re.lastIndex = 0; + + var match, tokens = []; + while (match = re.exec(line)) { var token = { @@ -116,19 +204,38 @@ getLineTokens = function(line, state) value: match[0] } - if (match[2] || match[3]) { - token.type = "string"; - } else if (match[5] && keywords[match[5]]) { - token.type = "keyword"; - } else if (match[6]) { - token.type = "comment"; - } + //console.log(match); + + for (var i=0; i < state.length; i++) + { + if (match[i+1]) + { + if (typeof state[i].token == "function") { + token.type = state[i].token(match[0]); + } else { + token.type = state[i].token; + } + + if (state[i].next && state[i].next !== currentState) + { + currentState = state[i].next; + var state = rules[currentState]; + var lastIndex = re.lastIndex; + + var re = regExps[currentState]; + re.lastIndex = lastIndex; + } + break; + } + }; tokens.push(token); }; + //console.log(tokens, currentState) + return { tokens: tokens, - state: "start" + state: currentState } }; \ No newline at end of file diff --git a/Editor.js b/Editor.js index 32e97451..fe8f7ff9 100644 --- a/Editor.js +++ b/Editor.js @@ -158,7 +158,6 @@ function Editor(doc, renderer) }, onTokenizerUpdate : function(startRow, endRow) { - console.log("token update", startRow, endRow); this.renderer.updateLines(startRow, endRow); }, diff --git a/editor.css b/editor.css index f101ee35..0b6f789f 100644 --- a/editor.css +++ b/editor.css @@ -81,6 +81,10 @@ color: rgb(0, 102, 255); } +.line .number { + color: rgb(0, 0, 205); +} + .marker-layer .selection { position: absolute; background: rgba(77, 151, 255, 0.33); diff --git a/experiments/tokenizer.html b/experiments/tokenizer.html index adcac8f2..c2f40f49 100644 --- a/experiments/tokenizer.html +++ b/experiments/tokenizer.html @@ -22,6 +22,7 @@ button.onclick = function() { var onComplete = function() { console.log("complete"); + console.log(tokenizer.lines); }; var onUpdate = function(firstLine, lastLine) {