Support for matching groups in tokenizer with arrays of tokens.
This commit is contained in:
parent
d1e05468c6
commit
ae5e83ea02
1 changed files with 44 additions and 18 deletions
|
|
@ -41,13 +41,24 @@ var Tokenizer = function(rules) {
|
|||
this.rules = rules;
|
||||
|
||||
this.regExps = {};
|
||||
this.matchMappings = {};
|
||||
for ( var key in this.rules) {
|
||||
var rule = this.rules[key];
|
||||
var state = rule;
|
||||
var ruleRegExps = [];
|
||||
|
||||
for ( var i = 0; i < state.length; i++)
|
||||
var matchTotal = 0;
|
||||
var mapping = this.matchMappings[key] = {};
|
||||
|
||||
for ( var i = 0; i < state.length; i++) {
|
||||
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a");
|
||||
mapping[matchTotal] = {
|
||||
rule: i,
|
||||
len: matchcount.length - 2
|
||||
};
|
||||
matchTotal += matchcount.length - 2;
|
||||
|
||||
ruleRegExps.push(state[i].regex);
|
||||
}
|
||||
|
||||
this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g");
|
||||
|
||||
|
|
@ -59,25 +70,30 @@ var Tokenizer = function(rules) {
|
|||
this.getLineTokens = function(line, startState) {
|
||||
var currentState = startState;
|
||||
var state = this.rules[currentState];
|
||||
var mapping = this.matchMappings[currentState];
|
||||
var re = this.regExps[currentState];
|
||||
re.lastIndex = 0;
|
||||
|
||||
|
||||
var match, tokens = [];
|
||||
|
||||
|
||||
var lastIndex = 0;
|
||||
|
||||
|
||||
var token = {
|
||||
type: null,
|
||||
value: ""
|
||||
};
|
||||
|
||||
|
||||
while (match = re.exec(line)) {
|
||||
var type = "text";
|
||||
var value = match[0];
|
||||
var value = [match[0]];
|
||||
|
||||
for ( var i = 0; i < state.length; i++) {
|
||||
if (match[i + 1] !== undefined) {
|
||||
var rule = state[i];
|
||||
var rule = state[mapping[i].rule];
|
||||
|
||||
if (mapping[i].len > 1) {
|
||||
value = match.slice(i+2, i+1+mapping[i].len);
|
||||
}
|
||||
|
||||
if (typeof rule.token == "function")
|
||||
type = rule.token(match[0]);
|
||||
|
|
@ -87,6 +103,7 @@ var Tokenizer = function(rules) {
|
|||
if (rule.next && rule.next !== currentState) {
|
||||
currentState = rule.next;
|
||||
state = this.rules[currentState];
|
||||
mapping = this.matchMappings[currentState];
|
||||
lastIndex = re.lastIndex;
|
||||
|
||||
re = this.regExps[currentState];
|
||||
|
|
@ -96,17 +113,26 @@ var Tokenizer = function(rules) {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
if (token.type !== type) {
|
||||
if (token.type)
|
||||
tokens.push(token);
|
||||
if (typeof type == "string") {
|
||||
if (typeof value != "string") {
|
||||
value = [value.join("")];
|
||||
}
|
||||
type = [type];
|
||||
}
|
||||
|
||||
for ( var i = 0; i < value.length; i++) {
|
||||
if (token.type !== type[i]) {
|
||||
if (token.type) {
|
||||
tokens.push(token);
|
||||
}
|
||||
|
||||
token = {
|
||||
type: type,
|
||||
value: value
|
||||
};
|
||||
} else {
|
||||
token.value += value;
|
||||
token = {
|
||||
type: type[i],
|
||||
value: value[i]
|
||||
}
|
||||
} else {
|
||||
token.value += value;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastIndex == line.length)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue