Adjust backreferences within tokenizer rules
This commit is contained in:
parent
0ebff3b5c9
commit
c2659f24fe
1 changed files with 16 additions and 5 deletions
|
|
@ -50,14 +50,25 @@ var Tokenizer = function(rules) {
|
|||
var mapping = this.matchMappings[key] = {};
|
||||
|
||||
for ( var i = 0; i < state.length; i++) {
|
||||
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a");
|
||||
// Count number of matching groups. 2 extra groups from the full match
|
||||
// And the catch-all on the end (used to force a match);
|
||||
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2;
|
||||
|
||||
// Get how long references can be, depending on how many capturing groups have been found so far.
|
||||
var reflength = ("" + (matchTotal + matchcount.length - 2)).length;
|
||||
|
||||
// Replace any backreferences and offset appropriately.
|
||||
var adjustedregex = state[i].regex.replace(new RegExp("\\\\([0-9]{1," + reflength + "})", "g"), function (match, digit) {
|
||||
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
|
||||
});
|
||||
|
||||
mapping[matchTotal] = {
|
||||
rule: i,
|
||||
len: matchcount.length - 2
|
||||
len: matchcount
|
||||
};
|
||||
matchTotal += matchcount.length - 2;
|
||||
matchTotal += matchcount;
|
||||
|
||||
ruleRegExps.push(state[i].regex);
|
||||
ruleRegExps.push(adjustedregex);
|
||||
}
|
||||
|
||||
this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g");
|
||||
|
|
@ -87,7 +98,7 @@ var Tokenizer = function(rules) {
|
|||
var type = "text";
|
||||
var value = [match[0]];
|
||||
|
||||
for ( var i = 0; i < state.length; i++) {
|
||||
for ( var i = 0; i < match.length-1; i++) {
|
||||
if (match[i + 1] !== undefined) {
|
||||
var rule = state[mapping[i].rule];
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue