From 91f4d87cf24df634075588577d642c37357d0751 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 6 Apr 2012 12:02:08 +0200 Subject: [PATCH] some tokenizer fixes --- lib/ace/mode/javascript_highlight_rules.js | 95 +++++++++++++++++++--- lib/ace/tokenizer.js | 55 +++++++------ 2 files changed, 113 insertions(+), 37 deletions(-) diff --git a/lib/ace/mode/javascript_highlight_rules.js b/lib/ace/mode/javascript_highlight_rules.js index b6a4c8ae..9d8782f7 100644 --- a/lib/ace/mode/javascript_highlight_rules.js +++ b/lib/ace/mode/javascript_highlight_rules.js @@ -137,25 +137,96 @@ var JavaScriptHighlightRules = function() { token : "constant.numeric", // float regex : "[+-]?\\d+(?:(?:\\.\\d*)?(?:[eE][+-]?\\d+)?)?\\b" }, { // match stuff like: Sound.prototype.play = function() { } - token : ["storage.type", "punctuation.operator", "support.function", "punctuation.operator", "entity.name.function", "text", "keyword.operator", "text", "storage.type", "text", "paren.lparen", "variable.parameter", "paren.rparen"], - regex : "(" + identifierRe + ")(\\.)(prototype)(\\.)(" + identifierRe +")(\\s*)(=)(\\s*)(function)?(\\s*)(\\()(.*?)(\\))" + token : [ + "storage.type", + "punctuation.operator", + "support.function", + "punctuation.operator", + "entity.name.function", + "text", + "keyword.operator", + "text", + "storage.type", + "text", + "paren.lparen", + "variable.parameter", + "paren.rparen" + ], + regex : "(" + identifierRe + ")(\\.)(prototype)(\\.)(" + identifierRe +")(\\s*)(=)(\\s*)(function)(\\s*)(\\()(.*?)(\\))" }, { // match stuff like: Sound.prototype.play = myfunc - token : ["storage.type", "punctuation.operator", "support.function", "punctuation.operator", "entity.name.function", "text", "keyword.operator", "text"], + token : [ + "storage.type", + "punctuation.operator", + "support.function", + "punctuation.operator", + "entity.name.function", + "text", + "keyword.operator", + "text" + ], regex : "(" + identifierRe + ")(\\.)(prototype)(\\.)(" + identifierRe +")(\\s*)(=)(\\s*)" }, { // match stuff like: Sound.play = function() { } - token : ["storage.type", "punctuation.operator", "entity.name.function", "text", "keyword.operator", "text", "storage.type", "text", "paren.lparen", "variable.parameter", "paren.rparen"], - regex : "(" + identifierRe + ")(\\.)(" + identifierRe +")(\\s*)(=)(\\s*)(function)?(\\s*)(\\()(.*?)(\\))" + token : [ + "storage.type", + "punctuation.operator", + "entity.name.function", + "text", + "keyword.operator", + "text", + "storage.type", + "text", + "paren.lparen", + "variable.parameter", + "paren.rparen" + ], + regex : "(" + identifierRe + ")(\\.)(" + identifierRe +")(\\s*)(=)(\\s*)(function)(\\s*)(\\()(.*?)(\\))" }, { // match stuff like: play = function() { } - token : ["entity.name.function", "text", "keyword.operator", "text", "storage.type", "text", "paren.lparen", "variable.parameter", "paren.rparen"], - regex : "(" + identifierRe +")(\\s*)(=)(\\s*)(function)?(\\s*)(\\()(.*?)(\\))" + token : [ + "entity.name.function", + "text", + "keyword.operator", + "text", + "storage.type", + "text", + "paren.lparen", + "variable.parameter", + "paren.rparen" + ], + regex : "(" + identifierRe +")(\\s*)(=)(\\s*)(function)(\\s*)(\\()(.*?)(\\))" }, { // match regular function like: function myFunc(arg) { } - token : ["storage.type", "text", "entity.name.function", "text", "paren.lparen", "variable.parameter", "paren.rparen"], + token : [ + "storage.type", + "text", + "entity.name.function", + "text", + "paren.lparen", + "variable.parameter", + "paren.rparen" + ], regex : "(function)(\\s+)(" + identifierRe + ")(\\s*)(\\()(.*?)(\\))" }, { // match stuff like: foobar: function() { } - token : ["entity.name.function", "text", "punctuation.operator", "text", "storage.type", "text", "paren.lparen", "variable.parameter", "paren.rparen"], - regex : "(" + identifierRe + ")(\\s*)(:)(\\s*)(function)?(\\s*)(\\()(.*?)(\\))" + token : [ + "entity.name.function", + "text", + "punctuation.operator", + "text", + "storage.type", + "text", + "paren.lparen", + "variable.parameter", + "paren.rparen" + ], + regex : "(" + identifierRe + ")(\\s*)(:)(\\s*)(function)(\\s*)(\\()(.*?)(\\))" }, { // Attempt to match : function() { } (this is for issues with 'foo': function() { }) - token : ["text", "text", "storage.type", "text", "paren.lparen", "variable.parameter", "paren.rparen"], + token : [ + "text", + "text", + "storage.type", + "text", + "paren.lparen", + "variable.parameter", + "paren.rparen" + ], regex : "(:)(\\s*)(function)?(\\s*)(\\()([^)]*)(\\))" }, { token : "constant.language.boolean", @@ -171,7 +242,7 @@ var JavaScriptHighlightRules = function() { token : "support.function.dom", regex : "\\b(?:s(?:ub(?:stringData|mit)|plitText|e(?:t(?:NamedItem|Attribute(?:Node)?)|lect))|has(?:ChildNodes|Feature)|namedItem|c(?:l(?:ick|o(?:se|neNode))|reate(?:C(?:omment|DATASection|aption)|T(?:Head|extNode|Foot)|DocumentFragment|ProcessingInstruction|E(?:ntityReference|lement)|Attribute))|tabIndex|i(?:nsert(?:Row|Before|Cell|Data)|tem)|open|delete(?:Row|C(?:ell|aption)|T(?:Head|Foot)|Data)|focus|write(?:ln)?|a(?:dd|ppend(?:Child|Data))|re(?:set|place(?:Child|Data)|move(?:NamedItem|Child|Attribute(?:Node)?)?)|get(?:NamedItem|Element(?:sBy(?:Name|TagName)|ById)|Attribute(?:Node)?)|blur)\\b(?=\\()" }, { - token : "support.function.constant", + token : "support.constant", regex : "\\b(?:s(?:ystemLanguage|cr(?:ipts|ollbars|een(?:X|Y|Top|Left))|t(?:yle(?:Sheets)?|atus(?:Text|bar)?)|ibling(?:Below|Above)|ource|uffixes|e(?:curity(?:Policy)?|l(?:ection|f)))|h(?:istory|ost(?:name)?|as(?:h|Focus))|y|X(?:MLDocument|SLDocument)|n(?:ext|ame(?:space(?:s|URI)|Prop))|M(?:IN_VALUE|AX_VALUE)|c(?:haracterSet|o(?:n(?:structor|trollers)|okieEnabled|lorDepth|mp(?:onents|lete))|urrent|puClass|l(?:i(?:p(?:boardData)?|entInformation)|osed|asses)|alle(?:e|r)|rypto)|t(?:o(?:olbar|p)|ext(?:Transform|Indent|Decoration|Align)|ags)|SQRT(?:1_2|2)|i(?:n(?:ner(?:Height|Width)|put)|ds|gnoreCase)|zIndex|o(?:scpu|n(?:readystatechange|Line)|uter(?:Height|Width)|p(?:sProfile|ener)|ffscreenBuffering)|NEGATIVE_INFINITY|d(?:i(?:splay|alog(?:Height|Top|Width|Left|Arguments)|rectories)|e(?:scription|fault(?:Status|Ch(?:ecked|arset)|View)))|u(?:ser(?:Profile|Language|Agent)|n(?:iqueID|defined)|pdateInterval)|_content|p(?:ixelDepth|ort|ersonalbar|kcs11|l(?:ugins|atform)|a(?:thname|dding(?:Right|Bottom|Top|Left)|rent(?:Window|Layer)?|ge(?:X(?:Offset)?|Y(?:Offset)?))|r(?:o(?:to(?:col|type)|duct(?:Sub)?|mpter)|e(?:vious|fix)))|e(?:n(?:coding|abledPlugin)|x(?:ternal|pando)|mbeds)|v(?:isibility|endor(?:Sub)?|Linkcolor)|URLUnencoded|P(?:I|OSITIVE_INFINITY)|f(?:ilename|o(?:nt(?:Size|Family|Weight)|rmName)|rame(?:s|Element)|gColor)|E|whiteSpace|l(?:i(?:stStyleType|n(?:eHeight|kColor))|o(?:ca(?:tion(?:bar)?|lName)|wsrc)|e(?:ngth|ft(?:Context)?)|a(?:st(?:M(?:odified|atch)|Index|Paren)|yer(?:s|X)|nguage))|a(?:pp(?:MinorVersion|Name|Co(?:deName|re)|Version)|vail(?:Height|Top|Width|Left)|ll|r(?:ity|guments)|Linkcolor|bove)|r(?:ight(?:Context)?|e(?:sponse(?:XML|Text)|adyState))|global|x|m(?:imeTypes|ultiline|enubar|argin(?:Right|Bottom|Top|Left))|L(?:N(?:10|2)|OG(?:10E|2E))|b(?:o(?:ttom|rder(?:Width|RightWidth|BottomWidth|Style|Color|TopWidth|LeftWidth))|ufferDepth|elow|ackground(?:Color|Image)))\\b" }, { token : ["punctuation.operator", "support.function.firebug"], diff --git a/lib/ace/tokenizer.js b/lib/ace/tokenizer.js index af6d8cae..a471aa76 100644 --- a/lib/ace/tokenizer.js +++ b/lib/ace/tokenizer.js @@ -55,12 +55,15 @@ var Tokenizer = function(rules, flag) { // Count number of matching groups. 2 extra groups from the full match // And the catch-all on the end (used to force a match); var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a").length - 2; - + // Replace any backreferences and offset appropriately. var adjustedregex = state[i].regex.replace(/\\([0-9]+)/g, function (match, digit) { return "\\" + (parseInt(digit, 10) + matchTotal + 1); }); + if (matchcount > 1 && state[i].token.length !== matchcount-1) + throw new Error("Matching groups and length of the token array don't match"); + mapping[matchTotal] = { rule: i, len: matchcount @@ -98,45 +101,47 @@ var Tokenizer = function(rules, flag) { var value = [match[0]]; for (var i = 0; i < match.length-2; i++) { - if (match[i + 1] !== undefined) { - rule = state[mapping[i].rule]; + if (match[i + 1] === undefined) + continue; - if (mapping[i].len > 1) { - value = match.slice(i+2, i+1+mapping[i].len); - } - - // compute token type - if (typeof rule.token == "function") - type = rule.token.apply(this, value); - else - type = rule.token; + rule = state[mapping[i].rule]; + + if (mapping[i].len > 1) + value = match.slice(i+2, i+1+mapping[i].len); + + // compute token type + if (typeof rule.token == "function") + type = rule.token.apply(this, value); + else + type = rule.token; - var next = rule.next; - if (next && next !== currentState) { - currentState = next; - state = this.rules[currentState]; - mapping = this.matchMappings[currentState]; - lastIndex = re.lastIndex; + var next = rule.next; + if (next && next !== currentState) { + currentState = next; + state = this.rules[currentState]; + mapping = this.matchMappings[currentState]; + lastIndex = re.lastIndex; - re = this.regExps[currentState]; - re.lastIndex = lastIndex; - } - break; + re = this.regExps[currentState]; + re.lastIndex = lastIndex; } + break; } - + if (value[0]) { if (typeof type == "string") { value = [value.join("")]; type = [type]; } for (var i = 0; i < value.length; i++) { + if (!value[i]) + continue; + if ((!rule || rule.merge || type[i] === "text") && token.type === type[i]) { token.value += value[i]; } else { - if (token.type) { + if (token.type) tokens.push(token); - } token = { type: type[i],