fix infinite loop in soy mode and make tokenizer more robust

2014-11-05 16:42:39 +04:00 · 2014-11-05 16:42:39 +04:00 · 70337ea5ba
commit 70337ea5ba
parent 78cda6bc3a
3 changed files with 55 additions and 11 deletions
--- a/lib/ace/mode/soy_template_highlight_rules.js
+++ b/lib/ace/mode/soy_template_highlight_rules.js
@ -188,7 +188,7 @@ var SoyTemplateHighlightRules = function() {
                 [ 'entity.other.attribute-name.soy',
                   'text',
                   'keyword.operator.soy' ],
-                regex: '\\b([\\w]*)(\\s*)((?::)?)' },
+                regex: '\\b([\\w]+)(\\s*)((?::)?)' },
              { defaultToken: 'meta.tag.param.soy' } ] } ],
      '#primitive': 
       [ { token: 'constant.language.soy',
--- a/lib/ace/tokenizer.js
+++ b/lib/ace/tokenizer.js
@ -32,7 +32,7 @@ define(function(require, exports, module) {
 "use strict";

 // tokenizing lines longer than this makes editor very slow
-var MAX_TOKEN_COUNT = 1000;
+var MAX_TOKEN_COUNT = 2000;
 /**
 * This class takes a set of highlighting rules, and creates a tokenizer out of them. For more information, see [the wiki on extending highlighters](https://github.com/ajaxorg/ace/wiki/Creating-or-Extending-an-Edit-Mode#wiki-extendingTheHighlighter).
 * @class Tokenizer
@ -77,9 +77,11 @@ var Tokenizer = function(rules) {
                if (rule.token.length == 1 || matchcount == 1) {
                    rule.token = rule.token[0];
                } else if (matchcount - 1 != rule.token.length) {
-                    throw new Error("number of classes and regexp groups in '" + 
-                        rule.token + "'\n'" + rule.regex +  "' doesn't match\n"
-                        + (matchcount - 1) + "!=" + rule.token.length);
+                    this.reportError("number of classes and regexp groups doesn't match", { 
+                        rule: rule,
+                        groupCount: matchcount - 1
+                    });
+                    rule.token = rule.token[0];
                } else {
                    rule.tokenArray = rule.token;
                    rule.token = null;
@ -240,6 +242,7 @@ var Tokenizer = function(rules) {

        var match, tokens = [];
        var lastIndex = 0;
+        var matchAttempts = 0;

        var token = {type: null, value: ""};

@ -280,7 +283,7 @@ var Tokenizer = function(rules) {
                    
                    state = this.states[currentState];
                    if (!state) {
-                        window.console && console.error && console.error(currentState, "doesn't exist");
+                        this.reportError("state doesn't exist", currentState);
                        currentState = "start";
                        state = this.states[currentState];
                    }
@ -293,7 +296,7 @@ var Tokenizer = function(rules) {
            }

            if (value) {
-                if (typeof type == "string") {
+                if (typeof type === "string") {
                    if ((!rule || rule.merge !== false) && token.type === type) {
                        token.value += value;
                    } else {
@ -315,7 +318,13 @@ var Tokenizer = function(rules) {

            lastIndex = index;

-            if (tokens.length > MAX_TOKEN_COUNT) {
+            if (matchAttempts++ > MAX_TOKEN_COUNT) {
+                if (matchAttempts > 2 * line.length) {
+                    this.reportError("infinite loop with in ace tokenizer", {
+                        startState: startState,
+                        line: line
+                    });
+                }
                // chrome doens't show contents of text nodes with very long text
                while (lastIndex < line.length) {
                    if (token.type)
@ -343,7 +352,14 @@ var Tokenizer = function(rules) {
            state : stack.length ? stack : currentState
        };
    };
-
+    
+    this.reportError = function(msg, data) {
+        var e = new Error(msg);
+        e.data = data;
+        if (typeof console == "object" && console.error)
+            console.error(e);
+        setTimeout(function() { throw e; });
+    };
 }).call(Tokenizer.prototype);

 exports.Tokenizer = Tokenizer;
--- a/lib/ace/tokenizer_test.js
+++ b/lib/ace/tokenizer_test.js
@ -29,7 +29,7 @@
 * ***** END LICENSE BLOCK ***** */

 if (typeof process !== "undefined") {
-     require("amd-loader");
+    require("amd-loader");
 }

 define(function(require, exports, module) {
@ -59,7 +59,35 @@ module.exports = {
        var t = new Tokenizer({});
        var re = t.removeCapturingGroups("(ax(by))[()]");
        assert.equal(re, "(?:ax(?:by))[()]");
-    }
+    },
+    
+    "test: broken highlight rules": function() {
+        var t = new Tokenizer({
+            start: [{ 
+                token: 's',
+                regex: '&&&|^^^' 
+            }, {
+                defaultToken: "def"
+            }],
+            state1: [{ 
+                token: 'x',
+                regex: /\b([\w]*)(\s*)((?::)?)/
+            }]
+        });
+        var errorReports = 0;
+        t.reportError = function() { errorReports++; };
+        var tokens = t.getLineTokens("x|", "start");
+        assert.deepEqual(tokens, {
+            tokens: [{value: 'x|', type: 'overflow'}],
+            state: 'start'
+        });
+        var tokens = t.getLineTokens("x|", "state1");
+        assert.deepEqual(tokens, {
+            tokens: [{value: 'x', type: 'x'}, {value: '|', type: 'overflow'}],
+            state: 'start'
+        });
+        assert.equal(errorReports, 2);
+    }, 
 };

 });