From 41438b75b77df2e9ed6588bf3f5bf1bd4537c798 Mon Sep 17 00:00:00 2001 From: nightwing Date: Wed, 27 Mar 2013 14:05:45 +0400 Subject: [PATCH] early error on tokenLength/matchCount mismatch --- lib/ace/mode/logiql_highlight_rules.js | 2 +- lib/ace/tokenizer.js | 11 ++- lib/ace/tokenizer_dev.js | 107 ++----------------------- 3 files changed, 13 insertions(+), 107 deletions(-) diff --git a/lib/ace/mode/logiql_highlight_rules.js b/lib/ace/mode/logiql_highlight_rules.js index 66fa642c..17ceab0c 100644 --- a/lib/ace/mode/logiql_highlight_rules.js +++ b/lib/ace/mode/logiql_highlight_rules.js @@ -98,7 +98,7 @@ var LogiQLHighlightRules = function() { //All the lang system predicates }, { token: [ 'storage.type', 'text' ], - regex: '(export|sealed|clauses|block|alias)\\s*\\((?=`)', + regex: '(export|sealed|clauses|block|alias)(\\s*\\()(?=`)', //Module keywords }, { token: 'entity.name', diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index 33a50936..189b671b 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -75,8 +75,12 @@ var Tokenizer = function(rules) { var adjustedregex = rule.regex; var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2; if (Array.isArray(rule.token)) { - if (rule.token.length == 1) { + if (rule.token.length == 1 || matchcount == 1) { rule.token = rule.token[0]; + } else if (matchcount - 1 != rule.token.length) { + throw new Error("number of classes and regexp groups in '" + + rule.token + "'\n'" + rule.regex + "' doesn't match\n" + + (matchcount - 1) + "!=" + rule.token.length); } else { rule.tokenArray = rule.token; rule.onMatch = this.$arrayTokens; @@ -143,11 +147,6 @@ var Tokenizer = function(rules) { var values = this.splitRegex.exec(str); var tokens = []; var types = this.tokenArray; - if (types.length != values.length - 1) { - if (window.console) - console.error(types , values, str, this.splitRegex, this); - return [{type: "error.invalid", value: str}]; - } for (var i = 0, l = types.length; i < l; i++) { if (values[i + 1]) tokens[tokens.length] = { diff --git a/lib/ace/tokenizer_dev.js b/lib/ace/tokenizer_dev.js index e8a4dd84..4321035c 100644 --- a/lib/ace/tokenizer_dev.js +++ b/lib/ace/tokenizer_dev.js @@ -29,6 +29,7 @@ * ***** END LICENSE BLOCK ***** */ define(function(require, exports, module) { +var BaseTokenizer = require("./tokenizer").Tokenizer; // tokenizing lines longer than this makes editor very slow var MAX_TOKEN_COUNT = 1000; @@ -39,101 +40,7 @@ var MAX_TOKEN_COUNT = 1000; **/ var Tokenizer = function(rules) { - this.states = rules; - - this.regExps = {}; - this.matchMappings = {}; - for (var key in this.states) { - var state = this.states[key]; - var ruleRegExps = []; - var matchTotal = 0; - var mapping = this.matchMappings[key] = {defaultToken: "default.text"}; - var flag = "g"; - - for (var i = 0; i < state.length; i++) { - var rule = state[i]; - if (rule.defaultToken) - mapping.defaultToken = rule.defaultToken; - if (rule.caseInsensitive) - flag = "gi"; - if (rule.regex == null) - continue; - - if (rule.regex instanceof RegExp) - rule.regex = rule.regex.toString().slice(1, -1); - - // Count number of matching groups. 2 extra groups from the full match - // And the catch-all on the end (used to force a match); - var adjustedregex = rule.regex; - var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2; - if (Array.isArray(rule.token)) { - if (rule.token.length == 1) { - rule.token = rule.token[0]; - } else { - rule.tokenArray = rule.token; - rule.token = this.$arrayTokens; - } - } - - if (matchcount > 1) { - if (/\\\d/.test(rule.regex)) { - // Replace any backreferences and offset appropriately. - adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function (match, digit) { - return "\\" + (parseInt(digit, 10) + matchTotal + 1); - }); - } else { - matchcount = 1; - adjustedregex = this.removeCapturingGroups(rule.regex); - } - if (!rule.splitRegex) - rule.splitRegex = this.createSplitterRegexp(rule.regex, flag); - } - - mapping[matchTotal] = i; - matchTotal += matchcount; - - ruleRegExps.push(adjustedregex); - } - - this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag); - } -}; - -(function() { - this.$arrayTokens = function(str) { - if (!str) - return []; - var values = str.split(this.splitRegex) - var tokens = []; - var types = this.tokenArray; - if (types.length != values.length - 2) { - if (window.console) - console.error(types.length , values.length - 2, str, this.splitRegex); - return [{type: "error.invalid", value: str}]; - } - for (var i = 0; i < types.length; i++) { - if (values[i + 1]) { - tokens[tokens.length] = { - type: types[i], - value: values[i + 1] - }; - } - } - return tokens; - }; - - this.removeCapturingGroups = function(src) { - var r = src.replace( - /\[(?:\\.|[^\]])*?\]|\\.|\(\?[:=!]|(\()/g, - function(x, y) {return y ? "(?:" : x;} - ); - return r; - }; - - this.createSplitterRegexp = function(src, flag) { - src = src.replace(/\(\?=([^()]|\\.)*?\)$/, ""); - return new RegExp(src, flag); - }; + BaseTokenizer.call(this, rules); /** * Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state. @@ -202,10 +109,10 @@ var Tokenizer = function(rules) { rule = state[mapping[i]]; - // compute token type - type = typeof rule.token == "function" - ? rule.token(value, currentState, stack) - : rule.token; + if (rule.onMatch) + type = rule.onMatch(value, currentState, stack); + else + type = rule.token; if (rule.next) { if (typeof rule.next == "string") @@ -268,7 +175,7 @@ var Tokenizer = function(rules) { }; }; -}).call(Tokenizer.prototype); +}; exports.Tokenizer = Tokenizer; });