early error on tokenLength/matchCount mismatch
This commit is contained in:
parent
7f76f0b81c
commit
41438b75b7
3 changed files with 13 additions and 107 deletions
|
|
@ -98,7 +98,7 @@ var LogiQLHighlightRules = function() {
|
|||
//All the lang system predicates
|
||||
},
|
||||
{ token: [ 'storage.type', 'text' ],
|
||||
regex: '(export|sealed|clauses|block|alias)\\s*\\((?=`)',
|
||||
regex: '(export|sealed|clauses|block|alias)(\\s*\\()(?=`)',
|
||||
//Module keywords
|
||||
},
|
||||
{ token: 'entity.name',
|
||||
|
|
|
|||
|
|
@ -75,8 +75,12 @@ var Tokenizer = function(rules) {
|
|||
var adjustedregex = rule.regex;
|
||||
var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2;
|
||||
if (Array.isArray(rule.token)) {
|
||||
if (rule.token.length == 1) {
|
||||
if (rule.token.length == 1 || matchcount == 1) {
|
||||
rule.token = rule.token[0];
|
||||
} else if (matchcount - 1 != rule.token.length) {
|
||||
throw new Error("number of classes and regexp groups in '" +
|
||||
rule.token + "'\n'" + rule.regex + "' doesn't match\n"
|
||||
+ (matchcount - 1) + "!=" + rule.token.length);
|
||||
} else {
|
||||
rule.tokenArray = rule.token;
|
||||
rule.onMatch = this.$arrayTokens;
|
||||
|
|
@ -143,11 +147,6 @@ var Tokenizer = function(rules) {
|
|||
var values = this.splitRegex.exec(str);
|
||||
var tokens = [];
|
||||
var types = this.tokenArray;
|
||||
if (types.length != values.length - 1) {
|
||||
if (window.console)
|
||||
console.error(types , values, str, this.splitRegex, this);
|
||||
return [{type: "error.invalid", value: str}];
|
||||
}
|
||||
for (var i = 0, l = types.length; i < l; i++) {
|
||||
if (values[i + 1])
|
||||
tokens[tokens.length] = {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
define(function(require, exports, module) {
|
||||
var BaseTokenizer = require("./tokenizer").Tokenizer;
|
||||
|
||||
// tokenizing lines longer than this makes editor very slow
|
||||
var MAX_TOKEN_COUNT = 1000;
|
||||
|
|
@ -39,101 +40,7 @@ var MAX_TOKEN_COUNT = 1000;
|
|||
**/
|
||||
|
||||
var Tokenizer = function(rules) {
|
||||
this.states = rules;
|
||||
|
||||
this.regExps = {};
|
||||
this.matchMappings = {};
|
||||
for (var key in this.states) {
|
||||
var state = this.states[key];
|
||||
var ruleRegExps = [];
|
||||
var matchTotal = 0;
|
||||
var mapping = this.matchMappings[key] = {defaultToken: "default.text"};
|
||||
var flag = "g";
|
||||
|
||||
for (var i = 0; i < state.length; i++) {
|
||||
var rule = state[i];
|
||||
if (rule.defaultToken)
|
||||
mapping.defaultToken = rule.defaultToken;
|
||||
if (rule.caseInsensitive)
|
||||
flag = "gi";
|
||||
if (rule.regex == null)
|
||||
continue;
|
||||
|
||||
if (rule.regex instanceof RegExp)
|
||||
rule.regex = rule.regex.toString().slice(1, -1);
|
||||
|
||||
// Count number of matching groups. 2 extra groups from the full match
|
||||
// And the catch-all on the end (used to force a match);
|
||||
var adjustedregex = rule.regex;
|
||||
var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2;
|
||||
if (Array.isArray(rule.token)) {
|
||||
if (rule.token.length == 1) {
|
||||
rule.token = rule.token[0];
|
||||
} else {
|
||||
rule.tokenArray = rule.token;
|
||||
rule.token = this.$arrayTokens;
|
||||
}
|
||||
}
|
||||
|
||||
if (matchcount > 1) {
|
||||
if (/\\\d/.test(rule.regex)) {
|
||||
// Replace any backreferences and offset appropriately.
|
||||
adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function (match, digit) {
|
||||
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
|
||||
});
|
||||
} else {
|
||||
matchcount = 1;
|
||||
adjustedregex = this.removeCapturingGroups(rule.regex);
|
||||
}
|
||||
if (!rule.splitRegex)
|
||||
rule.splitRegex = this.createSplitterRegexp(rule.regex, flag);
|
||||
}
|
||||
|
||||
mapping[matchTotal] = i;
|
||||
matchTotal += matchcount;
|
||||
|
||||
ruleRegExps.push(adjustedregex);
|
||||
}
|
||||
|
||||
this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag);
|
||||
}
|
||||
};
|
||||
|
||||
(function() {
|
||||
this.$arrayTokens = function(str) {
|
||||
if (!str)
|
||||
return [];
|
||||
var values = str.split(this.splitRegex)
|
||||
var tokens = [];
|
||||
var types = this.tokenArray;
|
||||
if (types.length != values.length - 2) {
|
||||
if (window.console)
|
||||
console.error(types.length , values.length - 2, str, this.splitRegex);
|
||||
return [{type: "error.invalid", value: str}];
|
||||
}
|
||||
for (var i = 0; i < types.length; i++) {
|
||||
if (values[i + 1]) {
|
||||
tokens[tokens.length] = {
|
||||
type: types[i],
|
||||
value: values[i + 1]
|
||||
};
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
};
|
||||
|
||||
this.removeCapturingGroups = function(src) {
|
||||
var r = src.replace(
|
||||
/\[(?:\\.|[^\]])*?\]|\\.|\(\?[:=!]|(\()/g,
|
||||
function(x, y) {return y ? "(?:" : x;}
|
||||
);
|
||||
return r;
|
||||
};
|
||||
|
||||
this.createSplitterRegexp = function(src, flag) {
|
||||
src = src.replace(/\(\?=([^()]|\\.)*?\)$/, "");
|
||||
return new RegExp(src, flag);
|
||||
};
|
||||
BaseTokenizer.call(this, rules);
|
||||
|
||||
/**
|
||||
* Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state.
|
||||
|
|
@ -202,10 +109,10 @@ var Tokenizer = function(rules) {
|
|||
|
||||
rule = state[mapping[i]];
|
||||
|
||||
// compute token type
|
||||
type = typeof rule.token == "function"
|
||||
? rule.token(value, currentState, stack)
|
||||
: rule.token;
|
||||
if (rule.onMatch)
|
||||
type = rule.onMatch(value, currentState, stack);
|
||||
else
|
||||
type = rule.token;
|
||||
|
||||
if (rule.next) {
|
||||
if (typeof rule.next == "string")
|
||||
|
|
@ -268,7 +175,7 @@ var Tokenizer = function(rules) {
|
|||
};
|
||||
};
|
||||
|
||||
}).call(Tokenizer.prototype);
|
||||
};
|
||||
|
||||
exports.Tokenizer = Tokenizer;
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue