From b837cd547b62292b88e4f6537c867fde0b7eb040 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Mon, 25 Jul 2011 17:32:17 +0200 Subject: [PATCH] highlight js identifiers with unicode characters. fix #148 --- lib/ace/edit_session.js | 6 +++--- lib/ace/mode/javascript_highlight_rules.js | 12 +++++++++--- lib/ace/mode/javascript_tokenizer_test.js | 5 +++++ lib/ace/mode/text.js | 15 +++++++++++++++ lib/ace/selection_test.js | 20 ++++++++++++++++++-- 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/lib/ace/edit_session.js b/lib/ace/edit_session.js index 58c19e74..f9e81f81 100644 --- a/lib/ace/edit_session.js +++ b/lib/ace/edit_session.js @@ -384,9 +384,6 @@ var EditSession = function(text, mode) { } }; - this.tokenRe = /^[\w\d]+/g; - this.nonTokenRe = /^(?:[^\w\d]|[\u3040-\u309F]|[\u30A0-\u30FF]|[\u4E00-\u9FFF\uF900-\uFAFF\u3400-\u4DBF])+/g; - this.getWordRange = function(row, column) { var line = this.getLine(row); @@ -477,6 +474,9 @@ var EditSession = function(text, mode) { this.bgTokenizer.setDocument(this.getDocument()); this.bgTokenizer.start(0); + + this.tokenRe = mode.tokenRe; + this.nonTokenRe = mode.nonTokenRe; this._dispatchEvent("changeMode"); }; diff --git a/lib/ace/mode/javascript_highlight_rules.js b/lib/ace/mode/javascript_highlight_rules.js index 14b1795c..a17edba3 100644 --- a/lib/ace/mode/javascript_highlight_rules.js +++ b/lib/ace/mode/javascript_highlight_rules.js @@ -40,6 +40,7 @@ define(function(require, exports, module) { var oop = require("pilot/oop"); var lang = require("pilot/lang"); +var regexp = require("ace/regexp"); var DocCommentHighlightRules = require("ace/mode/doc_comment_highlight_rules").DocCommentHighlightRules; var TextHighlightRules = require("ace/mode/text_highlight_rules").TextHighlightRules; @@ -60,6 +61,13 @@ var JavaScriptHighlightRules = function() { "public|interface|package|protected|static").split("|") ); + // TODO: Unicode escape sequences + var identifierRe = "[" + regexp.unicode.L + "\\$_][" + + regexp.unicode.L + + regexp.unicode.Mn + regexp.unicode.Mc + + regexp.unicode.Nd + + regexp.unicode.Pc + "\\$_]*\\b"; + // regexp must not have capturing parentheses. Use (?:) instead. // regexps are ordered -> the first match is used @@ -115,9 +123,7 @@ var JavaScriptHighlightRules = function() { else return "identifier"; }, - // TODO: Unicode escape sequences - // TODO: Unicode identifiers - regex : "[a-zA-Z_$][a-zA-Z0-9_$]*\\b" + regex : identifierRe }, { token : "keyword.operator", regex : "!|\\$|%|&|\\*|\\-\\-|\\-|\\+\\+|\\+|~|===|==|=|!=|!==|<=|>=|<<=|>>=|>>>=|<>|<|>|!|&&|\\|\\||\\?\\:|\\*=|%=|\\+=|\\-=|&=|\\^=|\\b(?:in|instanceof|new|delete|typeof|void)" diff --git a/lib/ace/mode/javascript_tokenizer_test.js b/lib/ace/mode/javascript_tokenizer_test.js index 76b33d3d..cc630fc5 100644 --- a/lib/ace/mode/javascript_tokenizer_test.js +++ b/lib/ace/mode/javascript_tokenizer_test.js @@ -107,6 +107,11 @@ module.exports = { "test tokenize regular expressions": function() { var tokens = this.tokenizer.getLineTokens("a/b/c", "start").tokens; assert.equal(5, tokens.length); + }, + + "test tokenize identifier with umlauts": function() { + var tokens = this.tokenizer.getLineTokens("füße", "start").tokens; + assert.equal(1, tokens.length); } }; diff --git a/lib/ace/mode/text.js b/lib/ace/mode/text.js index 40e4f506..1693dffe 100644 --- a/lib/ace/mode/text.js +++ b/lib/ace/mode/text.js @@ -43,6 +43,7 @@ define(function(require, exports, module) { var Tokenizer = require("ace/tokenizer").Tokenizer; var TextHighlightRules = require("ace/mode/text_highlight_rules").TextHighlightRules; var Behaviour = require("ace/mode/behaviour").Behaviour; +var regexp = require("ace/regexp"); var Mode = function() { this.$tokenizer = new Tokenizer(new TextHighlightRules().getRules()); @@ -51,6 +52,20 @@ var Mode = function() { (function() { + this.tokenRe = new RegExp("^[" + + regexp.unicode.L + + regexp.unicode.Mn + regexp.unicode.Mc + + regexp.unicode.Nd + + regexp.unicode.Pc + "\\$_]+", "g" + ); + + this.nonTokenRe = new RegExp("^(?:[^" + + regexp.unicode.L + + regexp.unicode.Mn + regexp.unicode.Mc + + regexp.unicode.Nd + + regexp.unicode.Pc + "\\$_]|\s])+", "g" + ); + this.getTokenizer = function() { return this.$tokenizer; }; diff --git a/lib/ace/selection_test.js b/lib/ace/selection_test.js index 30295da9..335b49db 100644 --- a/lib/ace/selection_test.js +++ b/lib/ace/selection_test.js @@ -144,8 +144,12 @@ module.exports = { }, "test: moveCursor word left" : function() { - var session = new EditSession( ["ab", - " Juhu Kinners (abc, 12)", " cde"].join("\n")); + var session = new EditSession([ + "ab", + " Juhu Kinners (abc, 12)", + " cde" + ].join("\n")); + var selection = session.getSelection(); selection.moveCursorDown(); @@ -183,6 +187,18 @@ module.exports = { selection.moveCursorWordLeft(); assert.position(selection.getCursor(), 0, 2); }, + + "test: moveCursor word left" : function() { + var session = new EditSession(" Fuß Füße"); + + var selection = session.getSelection(); + selection.moveCursorTo(0, 9) + selection.moveCursorWordLeft(); + assert.position(selection.getCursor(), 0, 5); + + selection.moveCursorWordLeft(); + assert.position(selection.getCursor(), 0, 4); + }, "test: select word left if cursor in word" : function() { var session = new EditSession("Juhu Kinners");