From ae5e83ea02cddf37b2d197faa17a14eacfa0ea6f Mon Sep 17 00:00:00 2001 From: Chris Spencer Date: Thu, 19 May 2011 02:36:00 -0700 Subject: [PATCH 1/7] Support for matching groups in tokenizer with arrays of tokens. --- lib/ace/tokenizer.js | 62 +++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 105c14c9..12e9c030 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -41,13 +41,24 @@ var Tokenizer = function(rules) { this.rules = rules; this.regExps = {}; + this.matchMappings = {}; for ( var key in this.rules) { var rule = this.rules[key]; var state = rule; var ruleRegExps = []; - - for ( var i = 0; i < state.length; i++) + var matchTotal = 0; + var mapping = this.matchMappings[key] = {}; + + for ( var i = 0; i < state.length; i++) { + var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a"); + mapping[matchTotal] = { + rule: i, + len: matchcount.length - 2 + }; + matchTotal += matchcount.length - 2; + ruleRegExps.push(state[i].regex); + } this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g"); @@ -59,25 +70,30 @@ var Tokenizer = function(rules) { this.getLineTokens = function(line, startState) { var currentState = startState; var state = this.rules[currentState]; + var mapping = this.matchMappings[currentState]; var re = this.regExps[currentState]; re.lastIndex = 0; - + var match, tokens = []; - + var lastIndex = 0; - + var token = { type: null, value: "" }; - + while (match = re.exec(line)) { var type = "text"; - var value = match[0]; + var value = [match[0]]; for ( var i = 0; i < state.length; i++) { if (match[i + 1] !== undefined) { - var rule = state[i]; + var rule = state[mapping[i].rule]; + + if (mapping[i].len > 1) { + value = match.slice(i+2, i+1+mapping[i].len); + } if (typeof rule.token == "function") type = rule.token(match[0]); @@ -87,6 +103,7 @@ var Tokenizer = function(rules) { if (rule.next && rule.next !== currentState) { currentState = rule.next; state = this.rules[currentState]; + mapping = this.matchMappings[currentState]; lastIndex = re.lastIndex; re = this.regExps[currentState]; @@ -96,17 +113,26 @@ var Tokenizer = function(rules) { } }; - - if (token.type !== type) { - if (token.type) - tokens.push(token); + if (typeof type == "string") { + if (typeof value != "string") { + value = [value.join("")]; + } + type = [type]; + } + + for ( var i = 0; i < value.length; i++) { + if (token.type !== type[i]) { + if (token.type) { + tokens.push(token); + } - token = { - type: type, - value: value - }; - } else { - token.value += value; + token = { + type: type[i], + value: value[i] + } + } else { + token.value += value; + } } if (lastIndex == line.length) From 0ebff3b5c9798470a15e2ddd7c0428396be276a8 Mon Sep 17 00:00:00 2001 From: Chris Spencer Date: Thu, 19 May 2011 03:02:08 -0700 Subject: [PATCH 2/7] Bugfix for consecutively typed tokens. --- lib/ace/tokenizer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 12e9c030..627d3841 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -131,7 +131,7 @@ var Tokenizer = function(rules) { value: value[i] } } else { - token.value += value; + token.value += value[i]; } } From c2659f24fed7670a55ca12c0ffd68ed737819d2e Mon Sep 17 00:00:00 2001 From: c-spencer Date: Thu, 19 May 2011 12:19:05 +0100 Subject: [PATCH 3/7] Adjust backreferences within tokenizer rules --- lib/ace/tokenizer.js | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 627d3841..1fecc294 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -50,14 +50,25 @@ var Tokenizer = function(rules) { var mapping = this.matchMappings[key] = {}; for ( var i = 0; i < state.length; i++) { - var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a"); + // Count number of matching groups. 2 extra groups from the full match + // And the catch-all on the end (used to force a match); + var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2; + + // Get how long references can be, depending on how many capturing groups have been found so far. + var reflength = ("" + (matchTotal + matchcount.length - 2)).length; + + // Replace any backreferences and offset appropriately. + var adjustedregex = state[i].regex.replace(new RegExp("\\\\([0-9]{1," + reflength + "})", "g"), function (match, digit) { + return "\\" + (parseInt(digit, 10) + matchTotal + 1); + }); + mapping[matchTotal] = { rule: i, - len: matchcount.length - 2 + len: matchcount }; - matchTotal += matchcount.length - 2; + matchTotal += matchcount; - ruleRegExps.push(state[i].regex); + ruleRegExps.push(adjustedregex); } this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g"); @@ -87,7 +98,7 @@ var Tokenizer = function(rules) { var type = "text"; var value = [match[0]]; - for ( var i = 0; i < state.length; i++) { + for ( var i = 0; i < match.length-1; i++) { if (match[i + 1] !== undefined) { var rule = state[mapping[i].rule]; From 3a1e9398ca3c76b6c43a10a930f3825f54cc1ac6 Mon Sep 17 00:00:00 2001 From: c-spencer Date: Thu, 19 May 2011 12:45:43 +0100 Subject: [PATCH 4/7] Fix match iteration --- lib/ace/tokenizer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 1fecc294..cfee8199 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -98,7 +98,7 @@ var Tokenizer = function(rules) { var type = "text"; var value = [match[0]]; - for ( var i = 0; i < match.length-1; i++) { + for ( var i = 0; i < match.length-2; i++) { if (match[i + 1] !== undefined) { var rule = state[mapping[i].rule]; From 6b660009adc122a780550f933d8fa966c330bfaf Mon Sep 17 00:00:00 2001 From: c-spencer Date: Thu, 19 May 2011 13:07:42 +0100 Subject: [PATCH 5/7] Simplify backreference adjustment --- lib/ace/tokenizer.js | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index cfee8199..6d03388a 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -53,12 +53,9 @@ var Tokenizer = function(rules) { // Count number of matching groups. 2 extra groups from the full match // And the catch-all on the end (used to force a match); var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2; - - // Get how long references can be, depending on how many capturing groups have been found so far. - var reflength = ("" + (matchTotal + matchcount.length - 2)).length; - + // Replace any backreferences and offset appropriately. - var adjustedregex = state[i].regex.replace(new RegExp("\\\\([0-9]{1," + reflength + "})", "g"), function (match, digit) { + var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) { return "\\" + (parseInt(digit, 10) + matchTotal + 1); }); From 1036aaa0574ce7759faaef4aaf50b70ea8dc81a1 Mon Sep 17 00:00:00 2001 From: c-spencer Date: Thu, 19 May 2011 18:30:03 +0100 Subject: [PATCH 6/7] Apply matched value to tokenizer token functions. --- lib/ace/tokenizer.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 6d03388a..74234607 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -104,7 +104,7 @@ var Tokenizer = function(rules) { } if (typeof rule.token == "function") - type = rule.token(match[0]); + type = rule.token.apply(this, value); else type = rule.token; From 40eba3a40118c73e339c1ea48f449f0156eac426 Mon Sep 17 00:00:00 2001 From: c-spencer Date: Thu, 19 May 2011 19:59:50 +0100 Subject: [PATCH 7/7] Get css tests passing (wrong test start rule) and update jsdom library paths --- lib/ace/mode/css_tokenizer_test.js | 8 ++++---- lib/ace/test/mockdom.js | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/ace/mode/css_tokenizer_test.js b/lib/ace/mode/css_tokenizer_test.js index 40569e16..a216ec2e 100644 --- a/lib/ace/mode/css_tokenizer_test.js +++ b/lib/ace/mode/css_tokenizer_test.js @@ -51,21 +51,21 @@ module.exports = { "test: tokenize pixel number" : function() { var line = "-12px"; - var tokens = this.tokenizer.getLineTokens(line, "start").tokens; + var tokens = this.tokenizer.getLineTokens(line, "ruleset").tokens; assert.equal(1, tokens.length); assert.equal("constant.numeric", tokens[0].type); }, "test: tokenize hex3 color" : function() { - var tokens = this.tokenizer.getLineTokens("#abc", "start").tokens; + var tokens = this.tokenizer.getLineTokens("#abc", "ruleset").tokens; assert.equal(1, tokens.length); assert.equal("constant.numeric", tokens[0].type); }, "test: tokenize hex6 color" : function() { - var tokens = this.tokenizer.getLineTokens("#abc012", "start").tokens; + var tokens = this.tokenizer.getLineTokens("#abc012", "ruleset").tokens; assert.equal(1, tokens.length); assert.equal("constant.numeric", tokens[0].type); @@ -81,7 +81,7 @@ module.exports = { }, "test for last rule in ruleset to catch capturing group bugs" : function() { - var tokens = this.tokenizer.getLineTokens("top", "start").tokens; + var tokens = this.tokenizer.getLineTokens("top", "ruleset").tokens; assert.equal(1, tokens.length); assert.equal("support.type", tokens[0].type); diff --git a/lib/ace/test/mockdom.js b/lib/ace/test/mockdom.js index 1c9b8476..4bf618a9 100644 --- a/lib/ace/test/mockdom.js +++ b/lib/ace/test/mockdom.js @@ -1,5 +1,5 @@ -var dom = require('jsdom/level2/html').dom.level2.html; -var browser = require('jsdom/browser/index').windowAugmentation(dom); +var dom = require('jsdom/lib/jsdom/level2/html').dom.level2.html; +var browser = require('jsdom/lib/jsdom/browser/index').windowAugmentation(dom); global.document = browser.document; global.window = browser.window;