early error on tokenLength/matchCount mismatch

This commit is contained in:
nightwing 2013-03-27 14:05:45 +04:00
commit 41438b75b7
3 changed files with 13 additions and 107 deletions

View file

@ -98,7 +98,7 @@ var LogiQLHighlightRules = function() {
//All the lang system predicates
},
{ token: [ 'storage.type', 'text' ],
regex: '(export|sealed|clauses|block|alias)\\s*\\((?=`)',
regex: '(export|sealed|clauses|block|alias)(\\s*\\()(?=`)',
//Module keywords
},
{ token: 'entity.name',

View file

@ -75,8 +75,12 @@ var Tokenizer = function(rules) {
var adjustedregex = rule.regex;
var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2;
if (Array.isArray(rule.token)) {
if (rule.token.length == 1) {
if (rule.token.length == 1 || matchcount == 1) {
rule.token = rule.token[0];
} else if (matchcount - 1 != rule.token.length) {
throw new Error("number of classes and regexp groups in '" +
rule.token + "'\n'" + rule.regex + "' doesn't match\n"
+ (matchcount - 1) + "!=" + rule.token.length);
} else {
rule.tokenArray = rule.token;
rule.onMatch = this.$arrayTokens;
@ -143,11 +147,6 @@ var Tokenizer = function(rules) {
var values = this.splitRegex.exec(str);
var tokens = [];
var types = this.tokenArray;
if (types.length != values.length - 1) {
if (window.console)
console.error(types , values, str, this.splitRegex, this);
return [{type: "error.invalid", value: str}];
}
for (var i = 0, l = types.length; i < l; i++) {
if (values[i + 1])
tokens[tokens.length] = {

View file

@ -29,6 +29,7 @@
* ***** END LICENSE BLOCK ***** */
define(function(require, exports, module) {
var BaseTokenizer = require("./tokenizer").Tokenizer;
// tokenizing lines longer than this makes editor very slow
var MAX_TOKEN_COUNT = 1000;
@ -39,101 +40,7 @@ var MAX_TOKEN_COUNT = 1000;
**/
var Tokenizer = function(rules) {
this.states = rules;
this.regExps = {};
this.matchMappings = {};
for (var key in this.states) {
var state = this.states[key];
var ruleRegExps = [];
var matchTotal = 0;
var mapping = this.matchMappings[key] = {defaultToken: "default.text"};
var flag = "g";
for (var i = 0; i < state.length; i++) {
var rule = state[i];
if (rule.defaultToken)
mapping.defaultToken = rule.defaultToken;
if (rule.caseInsensitive)
flag = "gi";
if (rule.regex == null)
continue;
if (rule.regex instanceof RegExp)
rule.regex = rule.regex.toString().slice(1, -1);
// Count number of matching groups. 2 extra groups from the full match
// And the catch-all on the end (used to force a match);
var adjustedregex = rule.regex;
var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2;
if (Array.isArray(rule.token)) {
if (rule.token.length == 1) {
rule.token = rule.token[0];
} else {
rule.tokenArray = rule.token;
rule.token = this.$arrayTokens;
}
}
if (matchcount > 1) {
if (/\\\d/.test(rule.regex)) {
// Replace any backreferences and offset appropriately.
adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function (match, digit) {
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
});
} else {
matchcount = 1;
adjustedregex = this.removeCapturingGroups(rule.regex);
}
if (!rule.splitRegex)
rule.splitRegex = this.createSplitterRegexp(rule.regex, flag);
}
mapping[matchTotal] = i;
matchTotal += matchcount;
ruleRegExps.push(adjustedregex);
}
this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag);
}
};
(function() {
this.$arrayTokens = function(str) {
if (!str)
return [];
var values = str.split(this.splitRegex)
var tokens = [];
var types = this.tokenArray;
if (types.length != values.length - 2) {
if (window.console)
console.error(types.length , values.length - 2, str, this.splitRegex);
return [{type: "error.invalid", value: str}];
}
for (var i = 0; i < types.length; i++) {
if (values[i + 1]) {
tokens[tokens.length] = {
type: types[i],
value: values[i + 1]
};
}
}
return tokens;
};
this.removeCapturingGroups = function(src) {
var r = src.replace(
/\[(?:\\.|[^\]])*?\]|\\.|\(\?[:=!]|(\()/g,
function(x, y) {return y ? "(?:" : x;}
);
return r;
};
this.createSplitterRegexp = function(src, flag) {
src = src.replace(/\(\?=([^()]|\\.)*?\)$/, "");
return new RegExp(src, flag);
};
BaseTokenizer.call(this, rules);
/**
* Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state.
@ -202,10 +109,10 @@ var Tokenizer = function(rules) {
rule = state[mapping[i]];
// compute token type
type = typeof rule.token == "function"
? rule.token(value, currentState, stack)
: rule.token;
if (rule.onMatch)
type = rule.onMatch(value, currentState, stack);
else
type = rule.token;
if (rule.next) {
if (typeof rule.next == "string")
@ -268,7 +175,7 @@ var Tokenizer = function(rules) {
};
};
}).call(Tokenizer.prototype);
};
exports.Tokenizer = Tokenizer;
});