Merge pull request #253 from fivesixty/master
Add matching group and back reference support to tokenizer rules.
This commit is contained in:
commit
993f2a17ee
3 changed files with 61 additions and 27 deletions
|
|
@ -51,21 +51,21 @@ module.exports = {
|
|||
|
||||
"test: tokenize pixel number" : function() {
|
||||
var line = "-12px";
|
||||
var tokens = this.tokenizer.getLineTokens(line, "start").tokens;
|
||||
var tokens = this.tokenizer.getLineTokens(line, "ruleset").tokens;
|
||||
|
||||
assert.equal(1, tokens.length);
|
||||
assert.equal("constant.numeric", tokens[0].type);
|
||||
},
|
||||
|
||||
"test: tokenize hex3 color" : function() {
|
||||
var tokens = this.tokenizer.getLineTokens("#abc", "start").tokens;
|
||||
var tokens = this.tokenizer.getLineTokens("#abc", "ruleset").tokens;
|
||||
|
||||
assert.equal(1, tokens.length);
|
||||
assert.equal("constant.numeric", tokens[0].type);
|
||||
},
|
||||
|
||||
"test: tokenize hex6 color" : function() {
|
||||
var tokens = this.tokenizer.getLineTokens("#abc012", "start").tokens;
|
||||
var tokens = this.tokenizer.getLineTokens("#abc012", "ruleset").tokens;
|
||||
|
||||
assert.equal(1, tokens.length);
|
||||
assert.equal("constant.numeric", tokens[0].type);
|
||||
|
|
@ -81,7 +81,7 @@ module.exports = {
|
|||
},
|
||||
|
||||
"test for last rule in ruleset to catch capturing group bugs" : function() {
|
||||
var tokens = this.tokenizer.getLineTokens("top", "start").tokens;
|
||||
var tokens = this.tokenizer.getLineTokens("top", "ruleset").tokens;
|
||||
|
||||
assert.equal(1, tokens.length);
|
||||
assert.equal("support.type", tokens[0].type);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
var dom = require('jsdom/level2/html').dom.level2.html;
|
||||
var browser = require('jsdom/browser/index').windowAugmentation(dom);
|
||||
var dom = require('jsdom/lib/jsdom/level2/html').dom.level2.html;
|
||||
var browser = require('jsdom/lib/jsdom/browser/index').windowAugmentation(dom);
|
||||
|
||||
global.document = browser.document;
|
||||
global.window = browser.window;
|
||||
|
|
|
|||
|
|
@ -41,13 +41,32 @@ var Tokenizer = function(rules) {
|
|||
this.rules = rules;
|
||||
|
||||
this.regExps = {};
|
||||
this.matchMappings = {};
|
||||
for ( var key in this.rules) {
|
||||
var rule = this.rules[key];
|
||||
var state = rule;
|
||||
var ruleRegExps = [];
|
||||
|
||||
for ( var i = 0; i < state.length; i++)
|
||||
ruleRegExps.push(state[i].regex);
|
||||
var matchTotal = 0;
|
||||
var mapping = this.matchMappings[key] = {};
|
||||
|
||||
for ( var i = 0; i < state.length; i++) {
|
||||
// Count number of matching groups. 2 extra groups from the full match
|
||||
// And the catch-all on the end (used to force a match);
|
||||
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2;
|
||||
|
||||
// Replace any backreferences and offset appropriately.
|
||||
var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) {
|
||||
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
|
||||
});
|
||||
|
||||
mapping[matchTotal] = {
|
||||
rule: i,
|
||||
len: matchcount
|
||||
};
|
||||
matchTotal += matchcount;
|
||||
|
||||
ruleRegExps.push(adjustedregex);
|
||||
}
|
||||
|
||||
this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g");
|
||||
|
||||
|
|
@ -59,34 +78,40 @@ var Tokenizer = function(rules) {
|
|||
this.getLineTokens = function(line, startState) {
|
||||
var currentState = startState;
|
||||
var state = this.rules[currentState];
|
||||
var mapping = this.matchMappings[currentState];
|
||||
var re = this.regExps[currentState];
|
||||
re.lastIndex = 0;
|
||||
|
||||
|
||||
var match, tokens = [];
|
||||
|
||||
|
||||
var lastIndex = 0;
|
||||
|
||||
|
||||
var token = {
|
||||
type: null,
|
||||
value: ""
|
||||
};
|
||||
|
||||
|
||||
while (match = re.exec(line)) {
|
||||
var type = "text";
|
||||
var value = match[0];
|
||||
var value = [match[0]];
|
||||
|
||||
for ( var i = 0; i < state.length; i++) {
|
||||
for ( var i = 0; i < match.length-2; i++) {
|
||||
if (match[i + 1] !== undefined) {
|
||||
var rule = state[i];
|
||||
var rule = state[mapping[i].rule];
|
||||
|
||||
if (mapping[i].len > 1) {
|
||||
value = match.slice(i+2, i+1+mapping[i].len);
|
||||
}
|
||||
|
||||
if (typeof rule.token == "function")
|
||||
type = rule.token(match[0]);
|
||||
type = rule.token.apply(this, value);
|
||||
else
|
||||
type = rule.token;
|
||||
|
||||
if (rule.next && rule.next !== currentState) {
|
||||
currentState = rule.next;
|
||||
state = this.rules[currentState];
|
||||
mapping = this.matchMappings[currentState];
|
||||
lastIndex = re.lastIndex;
|
||||
|
||||
re = this.regExps[currentState];
|
||||
|
|
@ -96,17 +121,26 @@ var Tokenizer = function(rules) {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
if (token.type !== type) {
|
||||
if (token.type)
|
||||
tokens.push(token);
|
||||
if (typeof type == "string") {
|
||||
if (typeof value != "string") {
|
||||
value = [value.join("")];
|
||||
}
|
||||
type = [type];
|
||||
}
|
||||
|
||||
for ( var i = 0; i < value.length; i++) {
|
||||
if (token.type !== type[i]) {
|
||||
if (token.type) {
|
||||
tokens.push(token);
|
||||
}
|
||||
|
||||
token = {
|
||||
type: type,
|
||||
value: value
|
||||
};
|
||||
} else {
|
||||
token.value += value;
|
||||
token = {
|
||||
type: type[i],
|
||||
value: value[i]
|
||||
}
|
||||
} else {
|
||||
token.value += value[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (lastIndex == line.length)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue