From ae5e83ea02cddf37b2d197faa17a14eacfa0ea6f Mon Sep 17 00:00:00 2001 From: Chris Spencer Date: Thu, 19 May 2011 02:36:00 -0700 Subject: [PATCH] Support for matching groups in tokenizer with arrays of tokens. --- lib/ace/tokenizer.js | 62 +++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 105c14c9..12e9c030 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -41,13 +41,24 @@ var Tokenizer = function(rules) { this.rules = rules; this.regExps = {}; + this.matchMappings = {}; for ( var key in this.rules) { var rule = this.rules[key]; var state = rule; var ruleRegExps = []; - - for ( var i = 0; i < state.length; i++) + var matchTotal = 0; + var mapping = this.matchMappings[key] = {}; + + for ( var i = 0; i < state.length; i++) { + var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a"); + mapping[matchTotal] = { + rule: i, + len: matchcount.length - 2 + }; + matchTotal += matchcount.length - 2; + ruleRegExps.push(state[i].regex); + } this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g"); @@ -59,25 +70,30 @@ var Tokenizer = function(rules) { this.getLineTokens = function(line, startState) { var currentState = startState; var state = this.rules[currentState]; + var mapping = this.matchMappings[currentState]; var re = this.regExps[currentState]; re.lastIndex = 0; - + var match, tokens = []; - + var lastIndex = 0; - + var token = { type: null, value: "" }; - + while (match = re.exec(line)) { var type = "text"; - var value = match[0]; + var value = [match[0]]; for ( var i = 0; i < state.length; i++) { if (match[i + 1] !== undefined) { - var rule = state[i]; + var rule = state[mapping[i].rule]; + + if (mapping[i].len > 1) { + value = match.slice(i+2, i+1+mapping[i].len); + } if (typeof rule.token == "function") type = rule.token(match[0]); @@ -87,6 +103,7 @@ var Tokenizer = function(rules) { if (rule.next && rule.next !== currentState) { currentState = rule.next; state = this.rules[currentState]; + mapping = this.matchMappings[currentState]; lastIndex = re.lastIndex; re = this.regExps[currentState]; @@ -96,17 +113,26 @@ var Tokenizer = function(rules) { } }; - - if (token.type !== type) { - if (token.type) - tokens.push(token); + if (typeof type == "string") { + if (typeof value != "string") { + value = [value.join("")]; + } + type = [type]; + } + + for ( var i = 0; i < value.length; i++) { + if (token.type !== type[i]) { + if (token.type) { + tokens.push(token); + } - token = { - type: type, - value: value - }; - } else { - token.value += value; + token = { + type: type[i], + value: value[i] + } + } else { + token.value += value; + } } if (lastIndex == line.length)