From c477184f2a0e2a0e89a88b63e0569b1ca077a826 Mon Sep 17 00:00:00 2001 From: nightwing Date: Sat, 12 Jan 2013 20:31:42 +0400 Subject: [PATCH] update more highlighters --- lib/ace/mode/coffee_highlight_rules.js | 84 ++++------ lib/ace/mode/markdown_highlight_rules.js | 131 ++++++++------- lib/ace/mode/php_highlight_rules.js | 27 +--- lib/ace/mode/text_highlight_rules.js | 28 ++-- lib/ace/mode/xml_util.js | 2 +- lib/ace/tokenizer_dev.js | 197 ++++++++++++++--------- tool/mode_creator.js | 2 +- 7 files changed, 258 insertions(+), 213 deletions(-) diff --git a/lib/ace/mode/coffee_highlight_rules.js b/lib/ace/mode/coffee_highlight_rules.js index 22298451..7731da88 100644 --- a/lib/ace/mode/coffee_highlight_rules.js +++ b/lib/ace/mode/coffee_highlight_rules.js @@ -38,10 +38,6 @@ define(function(require, exports, module) { function CoffeeHighlightRules() { var identifier = "[$A-Za-z_\\x7f-\\uffff][$\\w\\x7f-\\uffff]*"; - var stringfill = { - token : "string", - regex : ".+" - }; var keywords = ( "this|throw|then|try|typeof|super|switch|return|break|by|continue|" + @@ -91,31 +87,50 @@ define(function(require, exports, module) { regex: /(\()((?:"[^")]*?"|'[^')]*?'|\/[^\/)]*?\/|[^()\"'\/])*?)(\))(\s*)([\-=]>)/ }; + var stringEscape = /\\(?:x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|[0-2][0-7]{0,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.)/; + this.$rules = { start : [ { token : "constant.numeric", regex : "(?:0x[\\da-fA-F]+|(?:\\d+(?:\\.\\d+)?|\\.\\d+)(?:[eE][+-]?\\d+)?)" }, { - token : "string", - regex : "'''", - next : "qdoc" + stateName: "qdoc", + token : "string", regex : "'''", next : [ + {token : "string", regex : "'''", next : "start"}, + {token : "constant.language.escape", regex : stringEscape}, + {defaultToken: "string"}, + ] }, { + stateName: "qqdoc", token : "string", regex : '"""', - next : "qqdoc" + next : [ + {token : "string", regex : '"""', next : "start"}, + {token : "constant.language.escape", regex : stringEscape}, + {defaultToken: "string"} + ] }, { - token : "string", - regex : "'", - next : "qstring" + stateName: "qstring", + token : "string", regex : "'", next : [ + {token : "string", regex : "'", next : "start"}, + {token : "constant.language.escape", regex : stringEscape}, + {defaultToken: "string"}, + ] }, { - token : "string", - regex : '"', - next : "qqstring" + stateName: "qqstring", + token : "string.start", regex : '"', next : [ + {token : "string.end", regex : '"', next : "start"}, + {token : "constant.language.escape", regex : stringEscape}, + {defaultToken: "string"}, + ] }, { - token : "string", - regex : "`", - next : "js" + stateName: "js", + token : "string", regex : "`", next : [ + {token : "string", regex : "`", next : "start"}, + {token : "constant.language.escape", regex : stringEscape}, + {defaultToken: "string"}, + ] }, { token : "string.regex", regex : "///", @@ -173,35 +188,6 @@ define(function(require, exports, module) { regex : "\\s+" }], - qdoc : [{ - token : "string", - regex : ".*?'''", - next : "start" - }, stringfill], - - qqdoc : [{ - token : "string", - regex : '.*?"""', - next : "start" - }, stringfill], - - qstring : [{ - token : "string", - regex : "[^\\\\']*(?:\\\\.[^\\\\']*)*'", - next : "start" - }, stringfill], - - qqstring : [{ - token : "string", - regex : '[^\\\\"]*(?:\\\\.[^\\\\"]*)*"', - next : "start" - }, stringfill], - - js : [{ - token : "string", - regex : "[^\\\\`]*(?:\\\\.[^\\\\`]*)*`", - next : "start" - }, stringfill], heregex : [{ token : "string.regex", @@ -217,13 +203,13 @@ define(function(require, exports, module) { comment : [{ token : "comment", - regex : '.*?###', + regex : '###', next : "start" }, { - token : "comment", - regex : ".+" + defaultToken : "comment", }] }; + this.normalizeRules(); } exports.CoffeeHighlightRules = CoffeeHighlightRules; diff --git a/lib/ace/mode/markdown_highlight_rules.js b/lib/ace/mode/markdown_highlight_rules.js index 3da5bd83..6a62d7e1 100644 --- a/lib/ace/mode/markdown_highlight_rules.js +++ b/lib/ace/mode/markdown_highlight_rules.js @@ -3,7 +3,7 @@ * * Copyright (c) 2010, Ajax.org B.V. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright @@ -14,7 +14,7 @@ * * Neither the name of Ajax.org B.V. nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE @@ -52,38 +52,9 @@ var MarkdownHighlightRules = function() { // regexps are ordered -> the first match is used this.$rules = { - "start" : [ { - token : "empty_line", - regex : '^$' - }, { // code span ` + "basic" : [{ // code span ` token : "support.function", regex : "(`+)(.*?[^`])(\\1)" - }, { // code block - token : "support.function", - regex : "^[ ]{4}.+" - }, { // h1 - token: "markup.heading.1", - regex: "^=+(?=\\s*$)" - }, { // h2 - token: "markup.heading.2", - regex: "^\\-+(?=\\s*$)" - }, { // header - token : function(value) { - return "markup.heading." + value.search(/[^#]/); - }, - regex : "^#{1,6}(?:[^ #].*| +.*(?:[^ #].*|[^ ]+.* +#+ *))$" - }, github_embed("(?:javascript|js)", "js-"), - github_embed("xml", "xml-"), - github_embed("html", "html-"), - github_embed("css", "css-"), - { // Github style block - token : "support.function", - regex : "^```\\s*[a-zA-Z]*(?:{.*?\\})?\\s*$", - next : "githubblock" - }, { // block quote - token : "string", - regex : "^>[ ].+$", - next : "blockquote" }, { // reference token : ["text", "constant", "text", "url", "string", "text"], regex : "^([ ]{0,3}\\[)([^\\]]+)(\\]:\\s*)([^ ]+)(\\s*(?:[\"][^\"]+[\"])?(\\s*))$" @@ -98,46 +69,87 @@ var MarkdownHighlightRules = function() { "(?)"+ "((?:[ \t]*\"(?:.*?)\"[ \\t]*)?)"+ "(\\))" - }, { // HR * - token : "constant", - regex : "^[ ]{0,2}(?:[ ]?\\*[ ]?){3,}\\s*$" - }, { // HR - - token : "constant", - regex : "^[ ]{0,2}(?:[ ]?\\-[ ]?){3,}\\s*$" - }, { // HR _ - token : "constant", - regex : "^[ ]{0,2}(?:[ ]?\\_[ ]?){3,}\\s*$" - }, { // list - token : "markup.list", - regex : "^\\s{0,3}(?:[*+-]|\\d+\\.)\\s+", - next : "listblock" }, { // strong ** __ token : "string", regex : "([*]{2}|[_]{2}(?=\\S))(.*?\\S[*_]*)(\\1)" }, { // emphasis * _ token : "string", regex : "([*]|[_](?=\\S))(.*?\\S[*_]*)(\\1)" - }, { // + }, { // token : ["text", "url", "text"], regex : "(<)("+ "(?:https?|ftp|dict):[^'\">\\s]+"+ "|"+ "(?:mailto:)?[-.\\w]+\\@[-a-z0-9]+(?:\\.[-a-z0-9]+)*\\.[a-z]+"+ ")(>)" + }], + + // code block + "allowBlock": [ + {token : "support.function", regex : "^ {4}.+", next : "allowBlock"}, + {token : "empty", regex : "", next : "start"} + ], + + "start" : [{ + token : "empty_line", + regex : '^$', + next: "allowBlock" + }, { // h1 + token: "markup.heading.1", + regex: "^=+(?=\\s*$)" + }, { // h2 + token: "markup.heading.2", + regex: "^\\-+(?=\\s*$)" + }, { // header + token : function(value) { + return "markup.heading." + value.search(/[^#]/); + }, + regex : "^#{1,6}(?:[^ #].*| +.*(?:[^ #].*|[^ ]+.* +#+ *))$" + }, + github_embed("(?:javascript|js)", "js-"), + github_embed("xml", "xml-"), + github_embed("html", "html-"), + github_embed("css", "css-"), + { // Github style block + token : "support.function", + regex : "^```\\s*[a-zA-Z]*(?:{.*?\\})?\\s*$", + next : "githubblock" + }, { // block quote + token : "string", + regex : "^>[ ].+$", + next : "blockquote" + }, { // HR * - _ + token : "constant", + regex : "^ {0,2}(?:(?: ?\\* ?){3,}|(?: ?\\- ?){3,}|(?: ?\\_ ?){3,})\\s*$", + next: "allowBlock" + }, { // list + token : "markup.list", + regex : "^\\s{0,3}(?:[*+-]|\\d+\\.)\\s+", + next : "listblock-start" }, { - token : "text", - regex : "[^\\*_%$`\\[#<>]+" - } ], - + include : "basic" + }], + + "listblock-start" : [{ + token : "support.variable", + regex : /(?:\[[ x]\])?/, + next : "listblock" + }], + "listblock" : [ { // Lists only escape on completely blank lines. token : "empty_line", regex : "^$", next : "start" }, { + include : "basic", noEscape: true + }, { // list token : "markup.list", - regex : ".+" + regex : "^\\s{0,3}(?:[*+-]|\\d+\\.)\\s+", + next : "listblock-start" + }, { + defaultToken : "markup.list" } ], - + "blockquote" : [ { // BLockquotes only escape on blank lines. token : "empty_line", regex : "^\\s*$", @@ -146,7 +158,7 @@ var MarkdownHighlightRules = function() { token : "string", regex : ".+" } ], - + "githubblock" : [ { token : "support.function", regex : "^```", @@ -156,31 +168,31 @@ var MarkdownHighlightRules = function() { regex : ".+" } ] }; - + this.embedRules(JavaScriptHighlightRules, "js-", [{ token : "support.function", regex : "^```", next : "start" }]); - + this.embedRules(HtmlHighlightRules, "html-", [{ token : "support.function", regex : "^```", next : "start" }]); - + this.embedRules(CssHighlightRules, "css-", [{ token : "support.function", regex : "^```", next : "start" }]); - + this.embedRules(XmlHighlightRules, "xml-", [{ token : "support.function", regex : "^```", next : "start" }]); - + var html = new HtmlHighlightRules().getRules(); for (var i in html) { if (this.$rules[i]) @@ -188,7 +200,8 @@ var MarkdownHighlightRules = function() { else this.$rules[i] = html[i]; } - + + this.normalizeRules(); }; oop.inherits(MarkdownHighlightRules, TextHighlightRules); diff --git a/lib/ace/mode/php_highlight_rules.js b/lib/ace/mode/php_highlight_rules.js index 430fc0ec..b772f759 100644 --- a/lib/ace/mode/php_highlight_rules.js +++ b/lib/ace/mode/php_highlight_rules.js @@ -1030,31 +1030,18 @@ var PhpLangHighlightRules = function() { regex : '\\\\(?:[nrtvef\\\\"$]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})' }, { token : "constant.language.escape", - regex : /\$[\w\d]+(?:\[[\w\d]+\])?/ + regex : /\$[\w]+(?:\[[\w\]+]|=>\w+)?/ }, { token : "constant.language.escape", regex : /\$\{[^"\}]+\}?/ // this is wrong but ok for now - }, { - token : "string", - regex : '"', - next : "start" - }, { - token : "string", - regex : '.+?' - } + }, + {token : "string", regex : '"', next : "start"}, + {defaultToken : "string"} ], "qstring" : [ - { - token : "constant.language.escape", - regex : "\\\\['\\\\]" - }, { - token : "string", - regex : "'", - next : "start" - }, { - token : "string", - regex : ".+?" - } + {token : "constant.language.escape", regex : /\\['\\]/}, + {token : "string", regex : "'", next : "start"}, + {defaultToken : "string"} ] }; diff --git a/lib/ace/mode/text_highlight_rules.js b/lib/ace/mode/text_highlight_rules.js index 25a8bc87..2d11602c 100644 --- a/lib/ace/mode/text_highlight_rules.js +++ b/lib/ace/mode/text_highlight_rules.js @@ -99,24 +99,30 @@ var TextHighlightRules = function() { this.normalizeRules = function() { var id = 0; for (var key in this.$rules) { - var rule = this.$rules[key]; - for (var i = 0; i < rule.length; i++) { - var state = rule[i]; - if (state.next && Array.isArray(state.next)) { - var stateName = state.stateName || ("state" + id++); - this.$rules[stateName] = state.next; - state.next = stateName; + var state = this.$rules[key]; + for (var i = 0; i < state.length; i++) { + var rule = state[i]; + if (rule.next && Array.isArray(rule.next)) { + var stateName = rule.stateName || ("state" + id++); + this.$rules[stateName] = rule.next; + rule.next = stateName; } - if (state.rules) { - for (var r in state.rules) { + if (rule.rules) { + for (var r in rule.rules) { if (this.$rules[r]) { if (this.$rules[r].push) - this.$rules[r].push.apply(this.$rules[r], state.rules[r]); + this.$rules[r].push.apply(this.$rules[r], rule.rules[r]); } else { - this.$rules[r] = state.rules[r]; + this.$rules[r] = rule.rules[r]; } } } + if (rule.include || typeof rule == "string") { + var args = [i, 1].concat(this.$rules[rule.include || rule]); + if (rule.noEscape) + args = args.filter(function(x) {return !x.next;}); + state.splice.apply(state, args); + } } } }; diff --git a/lib/ace/mode/xml_util.js b/lib/ace/mode/xml_util.js index 781249f8..abae6075 100644 --- a/lib/ace/mode/xml_util.js +++ b/lib/ace/mode/xml_util.js @@ -50,7 +50,7 @@ function multiLineString(quote, state) { token : "constant.language.escape", regex : "(?:&#[0-9]+;)|(?:&#x[0-9a-fA-F]+;)|(?:&[a-zA-Z0-9_:\\.-]+;)" }, - {token : "string", regex : '\\w+|.|\\s+'} + {defaultToken : "string"} ]; } diff --git a/lib/ace/tokenizer_dev.js b/lib/ace/tokenizer_dev.js index 6022c4ae..2ec56692 100644 --- a/lib/ace/tokenizer_dev.js +++ b/lib/ace/tokenizer_dev.js @@ -37,51 +37,111 @@ define(function(require, exports, module) { **/ var Tokenizer = function(rules, flag) { flag = flag ? "g" + flag : "g"; - this.rules = rules; + this.states = rules; this.regExps = {}; this.matchMappings = {}; - for ( var key in this.rules) { - var rule = this.rules[key]; - var state = rule; + for (var key in this.states) { + var state = this.states[key]; var ruleRegExps = []; var matchTotal = 0; - var mapping = this.matchMappings[key] = {}; + var mapping = this.matchMappings[key] = {defaultToken: "default.text"}; - for ( var i = 0; i < state.length; i++) { - - if (state[i].regex instanceof RegExp) - state[i].regex = state[i].regex.toString().slice(1, -1); + for (var i = 0; i < state.length; i++) { + var rule = state[i]; + if (rule.defaultToken) { + mapping.defaultToken = rule.defaultToken; + continue; + } + if (rule.regex instanceof RegExp) + rule.regex = rule.regex.toString().slice(1, -1); // Count number of matching groups. 2 extra groups from the full match // And the catch-all on the end (used to force a match); - var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2; + var adjustedregex = rule.regex; + var matchcount = new RegExp("(?:(" + adjustedregex + ")|(.))").exec("a").length - 2; + if (Array.isArray(rule.token)) { + if (rule.token.length == 1) { + rule.token = rule.token[0]; + } else { + rule.tokenArray = rule.token; + rule.token = this.$arrayTokens; + } + } - // Replace any backreferences and offset appropriately. - var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) { - return "\\" + (parseInt(digit, 10) + matchTotal + 1); - }); + if (matchcount > 1) { + if (/\\\d/.test(rule.regex)) { + // Replace any backreferences and offset appropriately. + adjustedregex = rule.regex.replace(/\\([0-9]+)/g, function (match, digit) { + return "\\" + (parseInt(digit, 10) + matchTotal + 1); + }); + } else { + matchcount = 1; + adjustedregex = this.removeCapturingGroups(rule.regex); + } + if (!rule.splitRegex) + rule.splitRegex = this.createSplitterRegexp(rule.regex, flag); + } - if (matchcount > 1 && state[i].token.length !== matchcount-1) - throw new Error("For " + state[i].regex + " the matching groups and length of the token array don't match (rule #" + i + " of state " + key + ")"); - - mapping[matchTotal] = { - rule: i, - len: matchcount - }; + mapping[matchTotal] = i; matchTotal += matchcount; ruleRegExps.push(adjustedregex); } - this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", flag); + this.regExps[key] = new RegExp("(" + ruleRegExps.join(")|(") + ")|($)", flag); } }; (function() { + this.$arrayTokens = function(str) { + if (!str) + return []; + var values = str.split(this.splitRegex) + var tokens = []; + var types = this.tokenArray; + if (types.length != values.length - 2) { + if (window.console) + console.error(types.length , values.length - 2, str, this.splitRegex); + return [{type: "error.invalid", value: str}]; + } + for (var i = 0; i < types.length; i++) { + if (values[i + 1]) { + tokens[tokens.length] = { + type: types[i], + value: values[i + 1] + }; + } + } + return tokens; + }; + + this.removeCapturingGroups = function(src) { + var r = src.replace( + /\[(?:\\.|[^\]])*?\]|\\.|\(\?[:=!]|(\()/g, + function(x, y) {return y ? "(?:" : x;} + ); + return r; + }; + + this.createSplitterRegexp = function(src, flag) { + src = src.replace(/\(\?=([^()]|\\.)*?\)$/, ""); + return new RegExp(src, flag); + }; + + /** + * Returns an object containing two properties: `tokens`, which contains all the tokens; and `state`, the current state. + * @returns {Object} + **/ this.getLineTokens = function(line, startState) { + if (startState && typeof startState != "string") { + var stack = startState.slice(0); + startState = stack[0]; + } else + var stack = []; + var currentState = startState || "start"; - var state = this.rules[currentState]; + var state = this.states[currentState]; var mapping = this.matchMappings[currentState]; var re = this.regExps[currentState]; re.lastIndex = 0; @@ -110,92 +170,85 @@ var Tokenizer = function(rules, flag) { var maxRecur = 10000; while (match = re.exec(line)) { - var type = "default.text"; + var type = mapping.defaultToken; var rule = null; - var value = [match[0]]; + var value = match[0]; + var index = re.lastIndex; + + if (index - value.length > lastIndex) { + var skipped = line.substring(lastIndex, index - value.length); + if (token.type == type) { + token.value += skipped; + } else { + if (token.type) + tokens.push(token); + token = {type: type, value: skipped}; + } + } for (var i = 0; i < match.length-2; i++) { if (match[i + 1] === undefined) continue; if (!maxRecur--) { - throw "infinite" + mapping[i].rule + currentState + throw "infinite" + state[mapping[i]] + currentState } - rule = state[mapping[i].rule]; - - if (mapping[i].len > 1) - value = match.slice(i+2, i+1+mapping[i].len); + rule = state[mapping[i]]; // compute token type - if (typeof rule.token == "function") - type = rule.token.apply(this, value); - else - type = rule.token; + type = typeof rule.token == "function" + ? rule.token(value, currentState, stack) + : rule.token; if (rule.next) { currentState = rule.next; - state = this.rules[currentState]; - mapping = this.matchMappings[currentState]; - lastIndex = re.lastIndex; - - re = this.regExps[currentState]; - - if (re === undefined) { - throw new Error("You indicated a state of " + rule.next + " to go to, but it doesn't exist!"); + state = this.states[currentState]; + if (!state) { + window.console && console.error && console.error(currentState, "doesn't exist"); + currentState = "start"; + state = this.states[currentState]; } - - re.lastIndex = lastIndex; + mapping = this.matchMappings[currentState]; + lastIndex = index; + re = this.regExps[currentState]; + re.lastIndex = index; onStateChange(); } break; } - if (value[0]) { + if (value) { if (typeof type == "string") { - value = [value.join("")]; - type = [type]; - } - for (var i = 0; i < value.length; i++) { - if (!value[i]) - continue; - - var mergeable = (!rule || rule.merge || type[i] === "text") && token.type === type[i]; - - if (false && mergeable) { - token.value += value[i]; + if ((!rule || rule.merge !== false) && token.type === type) { + token.value += value; } else { - if (token.type) { - token.stateTransitions = stateTransitions; + if (token.type) tokens.push(token); - initState() - } - - token = { - type: type[i], - value: value[i], - state: currentState, - mergeable: mergeable - }; + token = {type: type, value: value}; } + } else { + if (token.type) + tokens.push(token); + token = {type: null, value: ""}; + for (var i = 0; i < type.length; i++) + tokens.push(type[i]); } } if (lastIndex == line.length) break; - lastIndex = re.lastIndex; + lastIndex = index; } - if (token.type) { - token.stateTransitions = stateTransitions; + if (token.type) tokens.push(token); - } return { tokens : tokens, - state : currentState + state : stack.length ? stack : currentState }; }; diff --git a/tool/mode_creator.js b/tool/mode_creator.js index 70ee3225..f87a9622 100644 --- a/tool/mode_creator.js +++ b/tool/mode_creator.js @@ -118,7 +118,7 @@ function run() { var path = "ace/mode/new"; var deps = getDeps(src, path); src = src.replace("define(", 'define("' + path +'", ["require","exports","module",' + deps +'],'); - src += ';require(["ace/mode/new"], continueRun, function(e){console.log(e);require.undef("ace/mode/new")})'; + src += ';require(["ace/mode/new"], continueRun, function(e){console.log(e);window.require.undef("ace/mode/new")})'; try { eval(src); } catch(e) {