From 24dd04d1babe99e9141ff7e8b075b8d496979379 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Fri, 9 Apr 2010 12:32:34 +0200 Subject: [PATCH] support multi line strings --- BackgroundTokenizer.js | 54 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/BackgroundTokenizer.js b/BackgroundTokenizer.js index 8b3d73b3..ead891c4 100644 --- a/BackgroundTokenizer.js +++ b/BackgroundTokenizer.js @@ -107,6 +107,9 @@ var keywords = { getLineTokens = function(line, startState) { + // regexp must not have capturing parentheses + // regexps are ordered -> the first match is used + var rules = { start : [ @@ -117,20 +120,30 @@ getLineTokens = function(line, startState) { token: "comment", // multi line comment in one line regex: "\\/\\*.*?\\*\\/" - }, + }, { - token: "comment", // multi line comment in several lines + token: "comment", // multi line comment start regex: "\\/\\*.*$", next: "comment" }, { token: "string", // single line - regex: '["][^"]*["]' + regex: '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' }, + { + token: "string", // multi line string start + regex: '["].*\\\\$', + next: "qqstring" + }, { token: "string", // single line - regex: "['][^']*[']" + regex: "['](?:(?:\\\\.)|(?:[^'\\\\]))*?[']" }, + { + token: "string", // multi line string start + regex: "['].*\\\\$", + next: "qstring" + }, { token: "number", // hex regex: "0[xX][0-9a-fA-F]+\\b" @@ -173,7 +186,31 @@ getLineTokens = function(line, startState) token: "comment", // comment spanning whole line regex: ".+" } - ] + ], + "qqstring": + [ + { + token: "string", + regex: '(?:(?:\\\\.)|(?:[^"\\\\]))*?"', + next: "start" + }, + { + token: "string", + regex: '.+' + } + ], + "qstring": + [ + { + token: "string", + regex: "(?:(?:\\\\.)|(?:[^'\\\\]))*?'", + next: "start" + }, + { + token: "string", + regex: '.+' + } + ] }; var regExps = {}; @@ -197,6 +234,8 @@ getLineTokens = function(line, startState) var match, tokens = []; + var lastIndex = 0; + while (match = re.exec(line)) { var token = { @@ -204,6 +243,11 @@ getLineTokens = function(line, startState) value: match[0] } + if (re.lastIndex == lastIndex) { + throw new Error("tokenizer error") + } + lastIndex = re.lastIndex; + //console.log(match); for (var i=0; i < state.length; i++)