Adjust backreferences within tokenizer rules

This commit is contained in:
c-spencer 2011-05-19 12:19:05 +01:00
commit c2659f24fe

View file

@ -50,14 +50,25 @@ var Tokenizer = function(rules) {
var mapping = this.matchMappings[key] = {};
for ( var i = 0; i < state.length; i++) {
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a");
// Count number of matching groups. 2 extra groups from the full match
// And the catch-all on the end (used to force a match);
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2;
// Get how long references can be, depending on how many capturing groups have been found so far.
var reflength = ("" + (matchTotal + matchcount.length - 2)).length;
// Replace any backreferences and offset appropriately.
var adjustedregex = state[i].regex.replace(new RegExp("\\\\([0-9]{1," + reflength + "})", "g"), function (match, digit) {
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
});
mapping[matchTotal] = {
rule: i,
len: matchcount.length - 2
len: matchcount
};
matchTotal += matchcount.length - 2;
matchTotal += matchcount;
ruleRegExps.push(state[i].regex);
ruleRegExps.push(adjustedregex);
}
this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g");
@ -87,7 +98,7 @@ var Tokenizer = function(rules) {
var type = "text";
var value = [match[0]];
for ( var i = 0; i < state.length; i++) {
for ( var i = 0; i < match.length-1; i++) {
if (match[i + 1] !== undefined) {
var rule = state[mapping[i].rule];