make createSplitterRegexp more robust

This commit is contained in:
nightwing 2013-03-06 20:02:17 +04:00
commit bc7d8a2ab2

View file

@ -93,7 +93,7 @@ var Tokenizer = function(rules) {
matchcount = 1;
adjustedregex = this.removeCapturingGroups(rule.regex);
}
if (!rule.splitRegex)
if (!rule.splitRegex && typeof rule.token != "string")
rule.splitRegex = this.createSplitterRegexp(rule.regex, flag);
}
@ -139,7 +139,34 @@ var Tokenizer = function(rules) {
};
this.createSplitterRegexp = function(src, flag) {
src = src.replace(/\(\?=([^()]|\\.|\(([^()]|\\.)*?\))*\)(?=\)*$)/, "");
if (src.indexOf("(?=") != -1) {
var stack = 0;
var inChClass = false;
var lastCapture = {};
src.replace(/(\\.)|(\((?:\?[=!])?)|(\))|([])/g, function(
m, esc, parenOpen, parenClose, square, index
) {
if (inChClass) {
inChClass = square != "]";
} else if (square) {
inChClass = true;
} else if (parenClose) {
if (stack == lastCapture.stack)
lastCapture.end = index+1
stack--;
} else if (parenOpen) {
stack++;
if (parenOpen.length != 1) {
lastCapture.stack = stack
lastCapture.start = index;
}
}
return m;
});
if (lastCapture.end != null && /^\)*$/.test(src.substr(lastCapture.end)))
src = src.substring(0, lastCapture.start) + src.substr(lastCapture.end);
}
return new RegExp(src, (flag||"").replace("g", ""));
};