ace/lib/ace/tokenizer_dev.js

274 lines
9.4 KiB
JavaScript

/* ***** BEGIN LICENSE BLOCK *****
* Distributed under the BSD license:
*
* Copyright (c) 2010, Ajax.org B.V.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Ajax.org B.V. nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ***** END LICENSE BLOCK ***** */
define(function(require, exports, module) {
// tokenizing lines longer than this makes editor very slow
var MAX_TOKEN_COUNT = 1000;
/*
* version of Tokenizer with additional logging
* and infinite loop checks
* can be used for developing/testing new modes
**/
var Tokenizer = function(rules) {
this.states = rules;
this.regExps = {};
this.matchMappings = {};
for (var key in this.states) {
var state = this.states[key];
var ruleRegExps = [];
var matchTotal = 0;
var mapping = this.matchMappings[key] = {defaultToken: "default.text"};
var flag = "g";
for (var i = 0; i < state.length; i++) {
var rule = state[i];
if (rule.defaultToken)
mapping.defaultToken = rule.defaultToken;
if (rule.caseInsensitive)
flag = "gi";
if (rule.regex == null)
continue;
if (rule.regex instanceof RegExp)
rule.regex = rule.regex.toString().slice(1, -1);
// Count number of matching groups. 2 extra groups from the full match
// And the catch-all on the end (used to force a match);
var adjustedregex = rule.regex;
var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2;
if (Array.isArray(rule.token)) {
if (rule.token.length == 1) {
rule.token = rule.token[0];
} else {
rule.tokenArray = rule.token;
rule.token = this.$arrayTokens;
}
}
if (matchcount > 1) {
if (/\\\d/.test(rule.regex)) {
// Replace any backreferences and offset appropriately.
adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function (match, digit) {
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
});
} else {
matchcount = 1;
adjustedregex = this.removeCapturingGroups(rule.regex);
}
if (!rule.splitRegex)
rule.splitRegex = this.createSplitterRegexp(rule.regex, flag);
}
mapping[matchTotal] = i;
matchTotal += matchcount;
ruleRegExps.push(adjustedregex);
}
this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag);
}
};
(function() {
this.$arrayTokens = function(str) {
if (!str)
return [];
var values = str.split(this.splitRegex)
var tokens = [];
var types = this.tokenArray;
if (types.length != values.length - 2) {
if (window.console)
console.error(types.length , values.length - 2, str, this.splitRegex);
return [{type: "error.invalid", value: str}];
}
for (var i = 0; i < types.length; i++) {
if (values[i + 1]) {
tokens[tokens.length] = {
type: types[i],
value: values[i + 1]
};
}
}
return tokens;
};
this.removeCapturingGroups = function(src) {
var r = src.replace(
/\[(?:\\.|[^\]])*?\]|\\.|\(\?[:=!]|(\()/g,
function(x, y) {return y ? "(?:" : x;}
);
return r;
};
this.createSplitterRegexp = function(src, flag) {
src = src.replace(/\(\?=([^()]|\\.)*?\)$/, "");
return new RegExp(src, flag);
};
/**
* Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state.
* @returns {Object}
**/
this.getLineTokens = function(line, startState) {
if (startState && typeof startState != "string") {
var stack = startState.slice(0);
startState = stack[0];
} else
var stack = [];
var currentState = startState || "start";
var state = this.states[currentState];
var mapping = this.matchMappings[currentState];
var re = this.regExps[currentState];
re.lastIndex = 0;
var match, tokens = [];
var lastIndex = 0;
var stateTransitions = [];
function onStateChange() {
stateTransitions.push(startState+"@"+lastIndex);
}
function initState() {
onStateChange();
stateTransitions = [];
onStateChange();
}
var token = {
type: null,
value: "",
state: currentState
};
initState();
var maxRecur = 10000;
while (match = re.exec(line)) {
var type = mapping.defaultToken;
var rule = null;
var value = match[0];
var index = re.lastIndex;
if (index - value.length > lastIndex) {
var skipped = line.substring(lastIndex, index - value.length);
if (token.type == type) {
token.value += skipped;
} else {
if (token.type)
tokens.push(token);
token = {type: type, value: skipped};
}
}
for (var i = 0; i < match.length-2; i++) {
if (match[i + 1] === undefined)
continue;
if (!maxRecur--) {
throw "infinite" + state[mapping[i]] + currentState
}
rule = state[mapping[i]];
// compute token type
type = typeof rule.token == "function"
? rule.token(value, currentState, stack)
: rule.token;
if (rule.next) {
if (typeof rule.next == "string")
currentState = rule.next;
else
currentState = rule.next(currentState, stack);
state = this.states[currentState];
if (!state) {
window.console && console.error && console.error(currentState, "doesn't exist");
currentState = "start";
state = this.states[currentState];
}
mapping = this.matchMappings[currentState];
lastIndex = index;
re = this.regExps[currentState];
re.lastIndex = index;
onStateChange();
}
break;
}
if (value) {
if (typeof type == "string") {
if ((!rule || rule.merge !== false) && token.type === type) {
token.value += value;
} else {
if (token.type)
tokens.push(token);
token = {type: type, value: value};
}
} else {
if (token.type)
tokens.push(token);
token = {type: null, value: ""};
for (var i = 0; i < type.length; i++)
tokens.push(type[i]);
}
}
if (lastIndex == line.length)
break;
lastIndex = index;
if (tokens.length > MAX_TOKEN_COUNT) {
token.value += line.substr(lastIndex);
currentState = "start"
break;
}
}
if (token.type)
tokens.push(token);
return {
tokens : tokens,
state : stack.length ? stack : currentState
};
};
}).call(Tokenizer.prototype);
exports.Tokenizer = Tokenizer;
});