ace/lib/ace/tokenizer_dev.js
2012-09-30 02:59:57 +04:00

205 lines
7 KiB
JavaScript

/* ***** BEGIN LICENSE BLOCK *****
* Distributed under the BSD license:
*
* Copyright (c) 2010, Ajax.org B.V.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Ajax.org B.V. nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL AJAX.ORG B.V. BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ***** END LICENSE BLOCK ***** */
define(function(require, exports, module) {
/**
* version of Tokenizer with additional logging
* and infinite loop checks
* can be used for developing/testing new modes
**/
var Tokenizer = function(rules, flag) {
flag = flag ? "g" + flag : "g";
this.rules = rules;
this.regExps = {};
this.matchMappings = {};
for ( var key in this.rules) {
var rule = this.rules[key];
var state = rule;
var ruleRegExps = [];
var matchTotal = 0;
var mapping = this.matchMappings[key] = {};
for ( var i = 0; i < state.length; i++) {
if (state[i].regex instanceof RegExp)
state[i].regex = state[i].regex.toString().slice(1, -1);
// Count number of matching groups. 2 extra groups from the full match
// And the catch-all on the end (used to force a match);
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2;
// Replace any backreferences and offset appropriately.
var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) {
return "\\" + (parseInt(digit, 10) + matchTotal + 1);
});
if (matchcount > 1 && state[i].token.length !== matchcount-1)
throw new Error("For " + state[i].regex + " the matching groups and length of the token array don't match (rule #" + i + " of state " + key + ")");
mapping[matchTotal] = {
rule: i,
len: matchcount
};
matchTotal += matchcount;
ruleRegExps.push(adjustedregex);
}
this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", flag);
}
};
(function() {
this.getLineTokens = function(line, startState) {
var currentState = startState || "start";
var state = this.rules[currentState];
var mapping = this.matchMappings[currentState];
var re = this.regExps[currentState];
re.lastIndex = 0;
var match, tokens = [];
var lastIndex = 0;
var stateTransitions = [];
function onStateChange() {
stateTransitions.push(startState+"@"+lastIndex);
}
function initState() {
onStateChange();
stateTransitions = [];
onStateChange();
}
var token = {
type: null,
value: "",
state: currentState,
};
initState();
var maxRecur = 10000;
while (match = re.exec(line)) {
var type = "default.text";
var rule = null;
var value = [match[0]];
for (var i = 0; i < match.length-2; i++) {
if (match[i + 1] === undefined)
continue;
if (!maxRecur--) {
throw "infinite" + mapping[i].rule + currentState
}
rule = state[mapping[i].rule];
if (mapping[i].len > 1)
value = match.slice(i+2, i+1+mapping[i].len);
// compute token type
if (typeof rule.token == "function")
type = rule.token.apply(this, value);
else
type = rule.token;
if (rule.next) {
currentState = rule.next;
state = this.rules[currentState];
mapping = this.matchMappings[currentState];
lastIndex = re.lastIndex;
re = this.regExps[currentState];
if (re === undefined) {
throw new Error("You indicated a state of " + rule.next + " to go to, but it doesn't exist!");
}
re.lastIndex = lastIndex;
onStateChange();
}
break;
}
if (value[0]) {
if (typeof type == "string") {
value = [value.join("")];
type = [type];
}
for (var i = 0; i < value.length; i++) {
if (!value[i])
continue;
var mergeable = (!rule || rule.merge || type[i] === "text") && token.type === type[i];
if (false && mergeable) {
token.value += value[i];
} else {
if (token.type) {
token.stateTransitions = stateTransitions;
tokens.push(token);
initState()
}
token = {
type: type[i],
value: value[i],
state: currentState,
mergeable: mergeable
};
}
}
}
if (lastIndex == line.length)
break;
lastIndex = re.lastIndex;
}
if (token.type) {
token.stateTransitions = stateTransitions;
tokens.push(token);
}
return {
tokens : tokens,
state : currentState
};
};
}).call(Tokenizer.prototype);
exports.Tokenizer = Tokenizer;
});