improve bgTokenizer

This commit is contained in:
nightwing 2012-05-24 22:07:27 +04:00
commit 3adcd1e5f8
10 changed files with 103 additions and 108 deletions

View file

@ -41,6 +41,9 @@ define(function(require, exports, module) {
var oop = require("./lib/oop");
var EventEmitter = require("./lib/event_emitter").EventEmitter;
// tokenizing lines longer than this makes editor very slow
var MAX_LINE_LENGTH = 5000;
/**
* class BackgroundTokenizer
*
@ -59,8 +62,9 @@ var EventEmitter = require("./lib/event_emitter").EventEmitter;
**/
var BackgroundTokenizer = function(tokenizer, editor) {
this.running = false;
this.running = false;
this.lines = [];
this.states = [];
this.currentLine = 0;
this.tokenizer = tokenizer;
@ -77,11 +81,12 @@ var BackgroundTokenizer = function(tokenizer, editor) {
var len = doc.getLength();
while (self.currentLine < len) {
self.lines[self.currentLine] = self.$tokenizeRows(self.currentLine, self.currentLine)[0];
self.currentLine++;
self.$tokenizeRow(self.currentLine);
while (self.lines[self.currentLine])
self.currentLine++;
// only check every 5 lines
processedLines += 1;
processedLines ++;
if ((processedLines % 5 == 0) && (new Date() - workerStart) > 20) {
self.fireUpdateEvent(startLine, self.currentLine-1);
self.running = setTimeout(self.$worker, 20);
@ -106,10 +111,10 @@ var BackgroundTokenizer = function(tokenizer, editor) {
* Sets a new tokenizer for this object.
*
**/
this.setTokenizer = function(tokenizer) {
this.tokenizer = tokenizer;
this.lines = [];
this.states = [];
this.start(0);
};
@ -121,10 +126,10 @@ var BackgroundTokenizer = function(tokenizer, editor) {
* Sets a new document to associate with this object.
*
**/
this.setDocument = function(doc) {
this.doc = doc;
this.lines = [];
this.states = [];
this.stop();
};
@ -137,7 +142,6 @@ var BackgroundTokenizer = function(tokenizer, editor) {
* Emits the `'update'` event. `firstRow` and `lastRow` are used to define the boundaries of the region to be updated.
*
**/
this.fireUpdateEvent = function(firstRow, lastRow) {
var data = {
first: firstRow,
@ -153,13 +157,36 @@ var BackgroundTokenizer = function(tokenizer, editor) {
* Starts tokenizing at the row indicated.
*
**/
this.start = function(startRow) {
this.currentLine = Math.min(startRow || 0, this.currentLine,
this.doc.getLength());
this.currentLine = Math.min(startRow || 0, this.currentLine, this.doc.getLength());
// remove all cached items below this line
this.lines.splice(this.currentLine, this.lines.length);
this.states.splice(this.currentLine, this.states.length);
this.stop();
// pretty long delay to prevent the tokenizer from interfering with the user
this.running = setTimeout(this.$worker, 700);
};
this.$updateOnChange = function(delta) {
var range = delta.range;
var startRow = range.start.row;
var len = range.end.row - startRow;
if (len === 0) {
this.lines[startRow] = null;
} else if (delta.action == "removeText" || delta.action == "removeLines") {
this.lines.splice(startRow, len + 1, null);
this.states.splice(startRow, len + 1, null);
} else {
var args = Array(len + 1);
args.unshift(startRow, 1);
this.lines.splice.apply(this.lines, args);
this.states.splice.apply(this.states, args);
}
this.currentLine = Math.min(startRow, this.currentLine, this.doc.getLength());
this.stop();
// pretty long delay to prevent the tokenizer from interfering with the user
@ -172,7 +199,6 @@ var BackgroundTokenizer = function(tokenizer, editor) {
* Stops tokenizing.
*
**/
this.stop = function() {
if (this.running)
clearTimeout(this.running);
@ -187,71 +213,49 @@ var BackgroundTokenizer = function(tokenizer, editor) {
* Starts tokenizing at the row indicated. Returns a list of objects of the tokenized rows.
*
**/
this.getTokens = function(firstRow, lastRow) {
return this.$tokenizeRows(firstRow, lastRow);
};
/**
* BackgroundTokenizer.getState(row) -> String
* - row (Number): The row to start at
*
* [Returns the state of tokenization for a row.]{: #BackgroundTokenizer.getState}
*
**/
this.getState = function(row) {
return this.$tokenizeRows(row, row)[0].state;
this.getTokens = function(row) {
return this.lines[row] || this.$tokenizeRow(row);
};
/**
* BackgroundTokenizer.$tokenizeRows(firstRow, lastRow) -> [Object]
* - startRow (Number): The row to start at
* - lastRow (Number): The row to finish at
* + ([Object]): A list of the tokenized rows. Each item in the list is an object with two properties, `state` and `start`.
*
* Tokenizes all the rows within the specified region.
*
* BackgroundTokenizer.getState(row) -> String
* - row (Number): The row to start at
*
* [Returns the state of tokenization at the end of a row.]{: #BackgroundTokenizer.getState}
**/
this.$tokenizeRows = function(firstRow, lastRow) {
if (!this.doc || isNaN(firstRow) || isNaN(lastRow))
return [{'state':'start','tokens':[]}];
var rows = [];
this.getState = function(row) {
if (this.currentLine == row)
this.$tokenizeRow(row);
return this.states[row] || "start";
};
// determine start state
var state = "start";
var doCache = false;
if (firstRow > 0 && this.lines[firstRow - 1]) {
state = this.lines[firstRow - 1].state;
doCache = true;
} else if (firstRow == 0) {
state = "start";
doCache = true;
} else if (this.lines.length > 0) {
// Guess that we haven't changed state.
state = this.lines[this.lines.length-1].state;
this.$tokenizeRow = function(row) {
var line = this.doc.getLine(row);
var state = this.states[row - 1];
if (line.length > MAX_LINE_LENGTH) {
var overflow = {value: line.substr(MAX_LINE_LENGTH), type: "text"};
line = line.slice(0, MAX_LINE_LENGTH);
}
var data = this.tokenizer.getLineTokens(line, state);
if (overflow) {
data.tokens.push(overflow);
data.state = null;
}
var lines = this.doc.getLines(firstRow, lastRow);
for (var row=firstRow; row<=lastRow; row++) {
if (!this.lines[row]) {
var tokens = this.tokenizer.getLineTokens(lines[row-firstRow] || "", state);
var state = tokens.state;
rows.push(tokens);
if (data.state == "start" && this.states[row] == null)
this.states[row] = "start";
if (doCache) {
this.lines[row] = tokens;
}
}
else {
var tokens = this.lines[row];
state = tokens.state;
rows.push(tokens);
}
if (this.states[row] !== data.state) {
this.states[row] = data.state;
this.lines[row + 1] = null;
if (this.currentLine > row + 1)
this.currentLine = row + 1;
} else if (this.currentLine == row) {
this.currentLine = row + 1;
}
return rows;
return this.lines[row] = data.tokens;
};
}).call(BackgroundTokenizer.prototype);

View file

@ -188,7 +188,7 @@ var EditSession = function(text, mode) {
this.$informUndoManager.schedule();
}
this.bgTokenizer.start(delta.range.start.row);
this.bgTokenizer.$updateOnChange(delta);
this._emit("change", e);
};
@ -249,15 +249,14 @@ var EditSession = function(text, mode) {
};
/** related to: BackgroundTokenizer.getTokens
* EditSession.getTokens(firstRow, lastRow) -> Array
* - firstRow (Number): The row to start at
* - lastRow (Number): The row to finish at
* EditSession.getTokens(row) -> Array
* - row (Number): The row to start at
*
* Starts tokenizing at the row indicated. Returns a list of objects of the tokenized rows.
*
**/
this.getTokens = function(firstRow, lastRow) {
return this.bgTokenizer.getTokens(firstRow, lastRow);
this.getTokens = function(row) {
return this.bgTokenizer.getTokens(row);
};
/**
@ -268,7 +267,7 @@ var EditSession = function(text, mode) {
* Returns an array of tokens at the indicated row and column.
**/
this.getTokenAt = function(row, column) {
var tokens = this.bgTokenizer.getTokens(row, row)[0].tokens;
var tokens = this.bgTokenizer.getTokens(row);
var token, c = 0;
if (column == null) {
i = tokens.length - 1;

View file

@ -70,10 +70,9 @@ exports.render = function(input, mode, theme, lineStart) {
var stringBuilder = [];
var length = session.getLength();
var tokens = session.getTokens(0, length - 1);
for(var ix = 0; ix < length; ix++) {
var lineTokens = tokens[ix].tokens;
var lineTokens = session.getTokens(ix);
stringBuilder.push("<div class='ace_line'>");
stringBuilder.push("<span class='ace_gutter ace_gutter-cell' unselectable='on'>" + (ix + lineStart) + "</span>");
textLayer.$renderLine(stringBuilder, 0, lineTokens, true);

View file

@ -259,8 +259,8 @@ var Text = function(parentEl) {
continue;
var html = [];
var tokens = this.session.getTokens(i, i);
this.$renderLine(html, i, tokens[0].tokens, !this.$useLineGroups());
var tokens = this.session.getTokens(i);
this.$renderLine(html, i, tokens, !this.$useLineGroups());
lineElement = dom.setInnerHtml(lineElement, html.join(""));
i = this.session.getRowFoldEnd(i);
@ -321,11 +321,8 @@ var Text = function(parentEl) {
var html = [];
// Get the tokens per line as there might be some lines in between
// beeing folded.
// OPTIMIZE: If there is a long block of unfolded lines, just make
// this call once for that big block of unfolded lines.
var tokens = this.session.getTokens(row, row);
if (tokens.length == 1)
this.$renderLine(html, row, tokens[0].tokens, false);
var tokens = this.session.getTokens(row);
this.$renderLine(html, row, tokens, false);
// don't use setInnerHtml since we are working with an empty DIV
container.innerHTML = html.join("");
@ -368,11 +365,8 @@ var Text = function(parentEl) {
// Get the tokens per line as there might be some lines in between
// beeing folded.
// OPTIMIZE: If there is a long block of unfolded lines, just make
// this call once for that big block of unfolded lines.
var tokens = this.session.getTokens(row, row);
if (tokens.length == 1)
this.$renderLine(html, row, tokens[0].tokens, false);
var tokens = this.session.getTokens(row);
this.$renderLine(html, row, tokens, false);
if (this.$useLineGroups())
html.push("</div>"); // end the line group
@ -567,12 +561,11 @@ var Text = function(parentEl) {
value: placeholder
});
} else {
if (isNewRow) {
tokens = this.session.getTokens(row, row)[0].tokens;
}
if (tokens.length != 0) {
if (isNewRow)
tokens = this.session.getTokens(row);
if (tokens.length)
addTokens(tokens, lastColumn, column);
}
}
}.bind(this), foldLine.end.row, this.session.getLine(foldLine.end.row).length);

View file

@ -68,13 +68,13 @@ module.exports = {
// row with hard tabs
var row = 0;
var tokens = this.session.getTokens(row, row)[0].tokens;
var tokens = this.session.getTokens(row);
var stringBuilder = [];
this.textLayer.$renderLine(stringBuilder, row, tokens);
// row with soft tabs
row = 1;
tokens = this.session.getTokens(row, row)[0].tokens;
tokens = this.session.getTokens(row);
var stringBuilder2 = [];
this.textLayer.$renderLine(stringBuilder2, row, tokens);
assert.equal(stringBuilder.join(""), stringBuilder2.join(""));
@ -83,7 +83,7 @@ module.exports = {
"test rendering width of ideographic space (U+3000)" : function() {
this.session.setValue("\u3000");
var tokens = this.session.getTokens(0, 0)[0].tokens;
var tokens = this.session.getTokens(0);
var stringBuilder = [];
this.textLayer.$renderLine(stringBuilder, 0, tokens, true);
assert.equal(stringBuilder.join(""), "<span class='ace_cjk' style='width:20px'></span>");

View file

@ -168,7 +168,7 @@ var CstyleBehaviour = function () {
}
// Find what token we're inside.
var tokens = session.getTokens(selection.start.row, selection.start.row)[0].tokens;
var tokens = session.getTokens(selection.start.row);
var col = 0, token;
var quotepos = -1; // Track whether we're inside an open quote.

View file

@ -71,7 +71,7 @@ oop.inherits(FoldMode, BaseFoldMode);
};
this._getFirstTagInLine = function(session, row) {
var tokens = session.getTokens(row, row)[0].tokens;
var tokens = session.getTokens(row);
var value = "";
for (var i = 0; i < tokens.length; i++) {
var token = tokens[i];

View file

@ -58,7 +58,7 @@ define(function(require, exports, module) {
var TokenIterator = function(session, initialRow, initialColumn) {
this.$session = session;
this.$row = initialRow;
this.$rowTokens = session.getTokens(initialRow, initialRow)[0].tokens;
this.$rowTokens = session.getTokens(initialRow);
var token = session.getTokenAt(initialRow, initialColumn);
this.$tokenIndex = token ? token.index : -1;
@ -82,7 +82,7 @@ var TokenIterator = function(session, initialRow, initialColumn) {
return null;
}
this.$rowTokens = this.$session.getTokens(this.$row, this.$row)[0].tokens;
this.$rowTokens = this.$session.getTokens(this.$row);
this.$tokenIndex = this.$rowTokens.length - 1;
}
@ -105,7 +105,7 @@ var TokenIterator = function(session, initialRow, initialColumn) {
return null;
}
this.$rowTokens = this.$session.getTokens(this.$row, this.$row)[0].tokens;
this.$rowTokens = this.$session.getTokens(this.$row);
this.$tokenIndex = 0;
}

View file

@ -145,10 +145,10 @@ module.exports = {
];
var session = new EditSession(lines.join("\n"), new JavaScriptMode());
var rows = session.getTokens(0, lines.length-1);
var tokens = [];
for (var i = 0; i < rows.length; i++)
tokens = tokens.concat(rows[i].tokens);
var len = session.getLength();
for (var i = 0; i < len; i++)
tokens = tokens.concat(session.getTokens(i));
var iterator = new TokenIterator(session, 0, 0);
for (var i = 1; i < tokens.length; i++)
@ -167,10 +167,10 @@ module.exports = {
];
var session = new EditSession(lines.join("\n"), new JavaScriptMode());
var rows = session.getTokens(0, lines.length-1);
var tokens = [];
for (var i = 0; i < rows.length; i++)
tokens = tokens.concat(rows[i].tokens);
var len = session.getLength();
for (var i = 0; i < len; i++)
tokens = tokens.concat(session.getTokens(i));
var iterator = new TokenIterator(session, 4, 0);
for (var i = tokens.length-2; i >= 0; i--)

View file

@ -100,11 +100,11 @@ var Tokenizer = function(rules, flag) {
/**
* Tokenizer.getLineTokens() -> Object
*
*
* Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state.
**/
this.getLineTokens = function(line, startState) {
var currentState = startState;
var currentState = startState || "start";
var state = this.rules[currentState];
var mapping = this.matchMappings[currentState];
var re = this.regExps[currentState];