diff --git a/lib/ace/mode/html.js b/lib/ace/mode/html.js
index cd6807c1..6014f02b 100644
--- a/lib/ace/mode/html.js
+++ b/lib/ace/mode/html.js
@@ -40,8 +40,10 @@ var HtmlHighlightRules = require("./html_highlight_rules").HtmlHighlightRules;
var HtmlBehaviour = require("./behaviour/html").HtmlBehaviour;
var HtmlFoldMode = require("./folding/html").FoldMode;
var HtmlCompletions = require("./html_completions").HtmlCompletions;
+var WorkerClient = require("../worker/worker_client").WorkerClient;
-var Mode = function() {
+var Mode = function(options) {
+ this.fragmentContext = options && options.fragmentContext;
this.HighlightRules = HtmlHighlightRules;
this.$behaviour = new HtmlBehaviour();
this.$completer = new HtmlCompletions();
@@ -71,6 +73,24 @@ oop.inherits(Mode, TextMode);
return this.$completer.getCompletions(state, session, pos, prefix);
};
+ this.createWorker = function(session) {
+ var worker = new WorkerClient(["ace"], "ace/mode/html_worker", "Worker");
+ worker.attachToDocument(session.getDocument());
+
+ if (this.fragmentContext)
+ worker.call("setOptions", [{context: this.fragmentContext}]);
+
+ worker.on("error", function(e) {
+ session.setAnnotations(e.data);
+ });
+
+ worker.on("terminate", function() {
+ session.clearAnnotations();
+ });
+
+ return worker;
+ };
+
this.$id = "ace/mode/html";
}).call(Mode.prototype);
diff --git a/lib/ace/mode/html/saxparser.js b/lib/ace/mode/html/saxparser.js
new file mode 100644
index 00000000..a15663f8
--- /dev/null
+++ b/lib/ace/mode/html/saxparser.js
@@ -0,0 +1,11113 @@
+define(function(require, exports, module){
+require=(function(e,t,n){function i(n,s){if(!t[n]){if(!e[n]){var o=typeof require=="function"&&require;if(!s&&o)return o(n,!0);if(r)return r(n,!0);throw new Error("Cannot find module '"+n+"'")}var u=t[n]={exports:{}};e[n][0].call(u.exports,function(t){var r=e[n][1][t];return i(r?r:t)},u,u.exports)}return t[n].exports}var r=typeof require=="function"&&require;for(var s=0;s= 0; i--) {
+ var node = this.elements[i];
+ if (node.localName === localName)
+ return true;
+ if (isMarker(node))
+ return false;
+ }
+};
+
+/**
+ * Pushes the item on the stack top
+ * @param {StackItem} item
+ */
+ElementStack.prototype.push = function(item) {
+ this.elements.push(item);
+};
+
+/**
+ * Pushes the item on the stack top
+ * @param {StackItem} item HTML element stack item
+ */
+ElementStack.prototype.pushHtmlElement = function(item) {
+ this.rootNode = item.node;
+ this.push(item);
+};
+
+/**
+ * Pushes the item on the stack top
+ * @param {StackItem} item HEAD element stack item
+ */
+ElementStack.prototype.pushHeadElement = function(item) {
+ this.headElement = item.node;
+ this.push(item);
+};
+
+/**
+ * Pushes the item on the stack top
+ * @param {StackItem} item BODY element stack item
+ */
+ElementStack.prototype.pushBodyElement = function(item) {
+ this.bodyElement = item.node;
+ this.push(item);
+};
+
+/**
+ * Pops the topmost item
+ * @return {StackItem}
+ */
+ElementStack.prototype.pop = function() {
+ return this.elements.pop();
+};
+
+/**
+ * Removes the item from the element stack
+ * @param {StackItem} item The item to remove
+ */
+ElementStack.prototype.remove = function(item) {
+ this.elements.splice(this.elements.indexOf(item), 1);
+};
+
+ElementStack.prototype.popUntilPopped = function(localName) {
+ var element;
+ do {
+ element = this.pop();
+ } while (element.localName != localName);
+};
+
+ElementStack.prototype.popUntilTableScopeMarker = function() {
+ while (!isTableScopeMarker(this.top))
+ this.pop();
+};
+
+ElementStack.prototype.popUntilTableBodyScopeMarker = function() {
+ while (!isTableBodyScopeMarker(this.top))
+ this.pop();
+};
+
+ElementStack.prototype.popUntilTableRowScopeMarker = function() {
+ while (!isTableRowScopeMarker(this.top))
+ this.pop();
+};
+
+ElementStack.prototype.item = function(index) {
+ return this.elements[index];
+};
+
+ElementStack.prototype.contains = function(element) {
+ return this.elements.indexOf(element) !== -1;
+};
+
+ElementStack.prototype.inScope = function(localName) {
+ return this._inScope(localName, isScopeMarker);
+};
+
+ElementStack.prototype.inListItemScope = function(localName) {
+ return this._inScope(localName, isListItemScopeMarker);
+};
+
+ElementStack.prototype.inTableScope = function(localName) {
+ return this._inScope(localName, isTableScopeMarker);
+};
+
+ElementStack.prototype.inButtonScope = function(localName) {
+ return this._inScope(localName, isButtonScopeMarker);
+};
+
+ElementStack.prototype.inSelectScope = function(localName) {
+ return this._inScope(localName, isSelectScopeMarker);
+};
+
+ElementStack.prototype.hasNumberedHeaderElementInScope = function() {
+ for (var i = this.elements.length - 1; i >= 0; i--) {
+ var node = this.elements[i];
+ if (node.isNumberedHeader())
+ return true;
+ if (isScopeMarker(node))
+ return false;
+ }
+};
+
+ElementStack.prototype.furthestBlockForFormattingElement = function(element) {
+ var furthestBlock = null;
+ for (var i = this.elements.length - 1; i >= 0; i--) {
+ var node = this.elements[i];
+ if (node.node === element)
+ return furthestBlock;
+ if (node.isSpecial())
+ furthestBlock = node;
+ }
+};
+
+ElementStack.prototype.findIndex = function(localName) {
+ for (var i = this.elements.length - 1; i >= 0; i--) {
+ if (this.elements[i].localName == localName)
+ return i;
+ }
+};
+
+ElementStack.prototype.remove_openElements_until = function(callback) {
+ var finished = false;
+ var element;
+ while (!finished) {
+ element = this.elements.pop();
+ finished = callback(element);
+ }
+ return element;
+};
+
+Object.defineProperty(ElementStack.prototype, 'top', {
+ get: function() {
+ return this.elements[this.elements.length - 1];
+ }
+});
+
+Object.defineProperty(ElementStack.prototype, 'length', {
+ get: function() {
+ return this.elements.length;
+ }
+});
+
+exports.ElementStack = ElementStack;
+
+},{}],2:[function(require,module,exports){
+var entities = require('html5-entities');
+var InputStream = require('./InputStream').InputStream;
+
+/**
+ * Magic value for UTF-16 operations.
+ */
+var LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
+
+var namedEntityPrefixes = {};
+Object.keys(entities).forEach(function (entityKey) {
+ for (var i = 0; i < entityKey.length; i++) {
+ namedEntityPrefixes[entityKey.substring(0, i + 1)] = true;
+ }
+});
+
+function isAlphaNumeric(c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+function isHexDigit(c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+
+function isDecimalDigit(c) {
+ return (c >= '0' && c <= '9');
+}
+
+var EntityParser = {};
+
+EntityParser.consumeEntity = function(buffer, tokenizer, additionalAllowedCharacter) {
+ var decodedCharacter = '';
+ var consumedCharacters = '';
+ var ch = buffer.char();
+ if (ch === InputStream.EOF)
+ return false;
+ consumedCharacters += ch;
+ if (ch == '\t' || ch == '\n' || ch == '\v' || ch == ' ' || ch == '<' || ch == '&') {
+ buffer.unget(consumedCharacters);
+ return false;
+ }
+ if (additionalAllowedCharacter === ch) {
+ buffer.unget(consumedCharacters);
+ return false;
+ }
+ if (ch == '#') {
+ ch = buffer.shift(1);
+ if (ch === InputStream.EOF) {
+ tokenizer._parseError("expected-numeric-entity-but-got-eof");
+ buffer.unget(consumedCharacters);
+ return false;
+ }
+ consumedCharacters += ch;
+ var radix = 10;
+ var isDigit = isDecimalDigit;
+ if (ch == 'x' || ch == 'X') {
+ radix = 16;
+ isDigit = isHexDigit;
+ ch = buffer.shift(1);
+ if (ch === InputStream.EOF) {
+ tokenizer._parseError("expected-numeric-entity-but-got-eof");
+ buffer.unget(consumedCharacters);
+ return false;
+ }
+ consumedCharacters += ch;
+ }
+ if (isDigit(ch)) {
+ var code = '';
+ while (ch !== InputStream.EOF && isDigit(ch)) {
+ code += ch;
+ ch = buffer.char();
+ }
+ code = parseInt(code, radix);
+ var replacement = this.replaceEntityNumbers(code);
+ if (replacement) {
+ tokenizer._parseError("invalid-numeric-entity-replaced");
+ code = replacement;
+ }
+ if (code > 0xFFFF && code < 0x10FFFF) {
+ var astralChar = "";
+ astralChar += String.fromCharCode(LEAD_OFFSET + (code >> 10));
+ astralChar += String.fromCharCode(0xDC00 + (code & 0x3FF));
+ decodedCharacter = astralChar;
+ } else
+ decodedCharacter = String.fromCharCode(code);
+ if (ch !== ';') {
+ tokenizer._parseError("numeric-entity-without-semicolon");
+ buffer.unget(ch);
+ }
+ return decodedCharacter;
+ }
+ buffer.unget(consumedCharacters);
+ tokenizer._parseError("expected-numeric-entity");
+ return false;
+ }
+ if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
+ var mostRecentMatch = '';
+ while (namedEntityPrefixes[consumedCharacters]) {
+ if (entities[consumedCharacters]) {
+ mostRecentMatch = consumedCharacters;
+ }
+ if (ch == ';')
+ break;
+ ch = buffer.char();
+ if (ch === InputStream.EOF)
+ break;
+ consumedCharacters += ch;
+ }
+ if (!mostRecentMatch) {
+ tokenizer._parseError("expected-named-entity");
+ buffer.unget(consumedCharacters);
+ return false;
+ }
+ decodedCharacter = entities[mostRecentMatch];
+ if (ch === ';' || !additionalAllowedCharacter || !(isAlphaNumeric(ch) || ch === '=')) {
+ if (consumedCharacters.length > mostRecentMatch.length) {
+ buffer.unget(consumedCharacters.substring(mostRecentMatch.length));
+ }
+ if (ch !== ';') {
+ tokenizer._parseError("named-entity-without-semicolon");
+ }
+ return decodedCharacter;
+ }
+ buffer.unget(consumedCharacters);
+ return false;
+ }
+};
+
+EntityParser.replaceEntityNumbers = function(c) {
+ switch(c) {
+ case 0x00: return 0xFFFD; // REPLACEMENT CHARACTER
+ case 0x13: return 0x0010; // Carriage return
+ case 0x80: return 0x20AC; // EURO SIGN
+ case 0x81: return 0x0081; //
+ case 0x82: return 0x201A; // SINGLE LOW-9 QUOTATION MARK
+ case 0x83: return 0x0192; // LATIN SMALL LETTER F WITH HOOK
+ case 0x84: return 0x201E; // DOUBLE LOW-9 QUOTATION MARK
+ case 0x85: return 0x2026; // HORIZONTAL ELLIPSIS
+ case 0x86: return 0x2020; // DAGGER
+ case 0x87: return 0x2021; // DOUBLE DAGGER
+ case 0x88: return 0x02C6; // MODIFIER LETTER CIRCUMFLEX ACCENT
+ case 0x89: return 0x2030; // PER MILLE SIGN
+ case 0x8A: return 0x0160; // LATIN CAPITAL LETTER S WITH CARON
+ case 0x8B: return 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ case 0x8C: return 0x0152; // LATIN CAPITAL LIGATURE OE
+ case 0x8D: return 0x008D; //
+ case 0x8E: return 0x017D; // LATIN CAPITAL LETTER Z WITH CARON
+ case 0x8F: return 0x008F; //
+ case 0x90: return 0x0090; //
+ case 0x91: return 0x2018; // LEFT SINGLE QUOTATION MARK
+ case 0x92: return 0x2019; // RIGHT SINGLE QUOTATION MARK
+ case 0x93: return 0x201C; // LEFT DOUBLE QUOTATION MARK
+ case 0x94: return 0x201D; // RIGHT DOUBLE QUOTATION MARK
+ case 0x95: return 0x2022; // BULLET
+ case 0x96: return 0x2013; // EN DASH
+ case 0x97: return 0x2014; // EM DASH
+ case 0x98: return 0x02DC; // SMALL TILDE
+ case 0x99: return 0x2122; // TRADE MARK SIGN
+ case 0x9A: return 0x0161; // LATIN SMALL LETTER S WITH CARON
+ case 0x9B: return 0x203A; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ case 0x9C: return 0x0153; // LATIN SMALL LIGATURE OE
+ case 0x9D: return 0x009D; //
+ case 0x9E: return 0x017E; // LATIN SMALL LETTER Z WITH CARON
+ case 0x9F: return 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
+ default:
+ if ((c >= 0xD800 && c <= 0xDFFF) || c >= 0x10FFFF) { /// @todo. The spec says > 0x10FFFF, not >=. Section 8.2.4.69.
+ return 0xFFFD;
+ } else if ((c >= 0x0001 && c <= 0x0008) || (c >= 0x000E && c <= 0x001F) ||
+ (c >= 0x007F && c <= 0x009F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
+ c == 0x000B || c == 0xFFFE || c == 0x1FFFE || c == 0x2FFFFE ||
+ c == 0x2FFFF || c == 0x3FFFE || c == 0x3FFFF || c == 0x4FFFE ||
+ c == 0x4FFFF || c == 0x5FFFE || c == 0x5FFFF || c == 0x6FFFE ||
+ c == 0x6FFFF || c == 0x7FFFE || c == 0x7FFFF || c == 0x8FFFE ||
+ c == 0x8FFFF || c == 0x9FFFE || c == 0x9FFFF || c == 0xAFFFE ||
+ c == 0xAFFFF || c == 0xBFFFE || c == 0xBFFFF || c == 0xCFFFE ||
+ c == 0xCFFFF || c == 0xDFFFE || c == 0xDFFFF || c == 0xEFFFE ||
+ c == 0xEFFFF || c == 0xFFFFE || c == 0xFFFFF || c == 0x10FFFE ||
+ c == 0x10FFFF) {
+ return c;
+ }
+ }
+};
+
+exports.EntityParser = EntityParser;
+
+},{"./InputStream":3,"html5-entities":12}],3:[function(require,module,exports){
+// FIXME convert CR to LF http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#input-stream
+function InputStream() {
+ this.data = '';
+ this.start = 0;
+ this.committed = 0;
+ this.eof = false;
+ this.lastLocation = {line: 0, column: 0};
+}
+
+InputStream.EOF = -1;
+
+InputStream.DRAIN = -2;
+
+InputStream.prototype = {
+ slice: function() {
+ if(this.start >= this.data.length) {
+ if(!this.eof) throw InputStream.DRAIN;
+ return InputStream.EOF;
+ }
+ return this.data.slice(this.start, this.data.length);
+ },
+ char: function() {
+ if(!this.eof && this.start >= this.data.length - 1) throw InputStream.DRAIN;
+ if(this.start >= this.data.length) {
+ return InputStream.EOF;
+ }
+ return this.data[this.start++];
+ },
+ advance: function(amount) {
+ this.start += amount;
+ if(this.start >= this.data.length) {
+ if(!this.eof) throw InputStream.DRAIN;
+ return InputStream.EOF;
+ } else {
+ if(this.committed > this.data.length / 2) {
+ // Sliiiide
+ this.lastLocation = this.location();
+ this.data = this.data.slice(this.committed);
+ this.start = this.start - this.committed;
+ this.committed = 0;
+ }
+ }
+ },
+ matchWhile: function(re) {
+ if(this.eof && this.start >= this.data.length ) return '';
+ var r = new RegExp("^"+re+"+");
+ var m = r.exec(this.slice());
+ if(m) {
+ if(!this.eof && m[0].length == this.data.length - this.start) throw InputStream.DRAIN;
+ this.advance(m[0].length);
+ return m[0];
+ } else {
+ return '';
+ }
+ },
+ matchUntil: function(re) {
+ var m, s;
+ s = this.slice();
+ if(s === InputStream.EOF) {
+ return '';
+ } else if(m = new RegExp(re + (this.eof ? "|$" : "")).exec(s)) {
+ var t = this.data.slice(this.start, this.start + m.index);
+ this.advance(m.index);
+ return t.toString();
+ } else {
+ throw InputStream.DRAIN;
+ }
+ },
+ append: function(data) {
+ this.data += data;
+ },
+ shift: function(n) {
+ if(!this.eof && this.start + n >= this.data.length) throw InputStream.DRAIN;
+ if(this.eof && this.start >= this.data.length) return InputStream.EOF;
+ var d = this.data.slice(this.start, this.start + n).toString();
+ this.advance(Math.min(n, this.data.length - this.start));
+ return d;
+ },
+ peek: function(n) {
+ if(!this.eof && this.start + n >= this.data.length) throw InputStream.DRAIN;
+ if(this.eof && this.start >= this.data.length) return InputStream.EOF;
+ return this.data.slice(this.start, Math.min(this.start + n, this.data.length)).toString();
+ },
+ length: function() {
+ return this.data.length - this.start - 1;
+ },
+ unget: function(d) {
+ if(d === InputStream.EOF) return;
+ this.start -= (d.length);
+ },
+ undo: function() {
+ this.start = this.committed;
+ },
+ commit: function() {
+ this.committed = this.start;
+ },
+ location: function() {
+ var lastLine = this.lastLocation.line;
+ var lastColumn = this.lastLocation.column;
+ var read = this.data.slice(0, this.committed);
+ var newlines = read.match(/\n/g);
+ var line = newlines ? lastLine + newlines.length : lastLine;
+ var column = newlines ? read.length - read.lastIndexOf('\n') - 1 : lastColumn + read.length;
+ return {line: line, column: column};
+ }
+};
+
+exports.InputStream = InputStream;
+
+},{}],4:[function(require,module,exports){
+var SpecialElements = {
+ "http://www.w3.org/1999/xhtml": [
+ 'address',
+ 'applet',
+ 'area',
+ 'article',
+ 'aside',
+ 'base',
+ 'basefont',
+ 'bgsound',
+ 'blockquote',
+ 'body',
+ 'br',
+ 'button',
+ 'caption',
+ 'center',
+ 'col',
+ 'colgroup',
+ 'dd',
+ 'details',
+ 'dir',
+ 'div',
+ 'dl',
+ 'dt',
+ 'embed',
+ 'fieldset',
+ 'figcaption',
+ 'figure',
+ 'footer',
+ 'form',
+ 'frame',
+ 'frameset',
+ 'h1',
+ 'h2',
+ 'h3',
+ 'h4',
+ 'h5',
+ 'h6',
+ 'head',
+ 'header',
+ 'hgroup',
+ 'hr',
+ 'html',
+ 'iframe',
+ 'img',
+ 'input',
+ 'isindex',
+ 'li',
+ 'link',
+ 'listing',
+ 'main',
+ 'marquee',
+ 'menu',
+ 'menuitem',
+ 'meta',
+ 'nav',
+ 'noembed',
+ 'noframes',
+ 'noscript',
+ 'object',
+ 'ol',
+ 'p',
+ 'param',
+ 'plaintext',
+ 'pre',
+ 'script',
+ 'section',
+ 'select',
+ 'source',
+ 'style',
+ 'summary',
+ 'table',
+ 'tbody',
+ 'td',
+ 'textarea',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'title',
+ 'tr',
+ 'track',
+ 'ul',
+ 'wbr',
+ 'xmp'
+ ],
+ "http://www.w3.org/1998/Math/MathML": [
+ 'mi',
+ 'mo',
+ 'mn',
+ 'ms',
+ 'mtext',
+ 'annotation-xml'
+ ],
+ "http://www.w3.org/2000/svg": [
+ 'foreignObject',
+ 'desc',
+ 'title'
+ ]
+};
+
+
+function StackItem(namespaceURI, localName, attributes, node) {
+ this.localName = localName;
+ this.namespaceURI = namespaceURI;
+ this.attributes = attributes;
+ this.node = node;
+}
+
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
+StackItem.prototype.isSpecial = function() {
+ return this.namespaceURI in SpecialElements &&
+ SpecialElements[this.namespaceURI].indexOf(this.localName) > -1;
+};
+
+StackItem.prototype.isFosterParenting = function() {
+ if (this.namespaceURI === "http://www.w3.org/1999/xhtml") {
+ return this.localName === 'table' ||
+ this.localName === 'tbody' ||
+ this.localName === 'tfoot' ||
+ this.localName === 'thead' ||
+ this.localName === 'tr';
+ }
+ return false;
+};
+
+StackItem.prototype.isNumberedHeader = function() {
+ if (this.namespaceURI === "http://www.w3.org/1999/xhtml") {
+ return this.localName === 'h1' ||
+ this.localName === 'h2' ||
+ this.localName === 'h3' ||
+ this.localName === 'h4' ||
+ this.localName === 'h5' ||
+ this.localName === 'h6';
+ }
+ return false;
+};
+
+StackItem.prototype.isForeign = function() {
+ return this.namespaceURI != "http://www.w3.org/1999/xhtml";
+};
+
+function getAttribute(item, name) {
+ for (var i = 0; i < item.attributes.length; i++) {
+ if (item.attributes[i].nodeName == name)
+ return item.attributes[i].nodeValue;
+ }
+ return null;
+}
+
+StackItem.prototype.isHtmlIntegrationPoint = function() {
+ if (this.namespaceURI === "http://www.w3.org/1998/Math/MathML") {
+ if (this.localName !== "annotation-xml")
+ return false;
+ var encoding = getAttribute(this, 'encoding');
+ if (!encoding)
+ return false;
+ encoding = encoding.toLowerCase();
+ return encoding === "text/html" || encoding === "application/xhtml+xml";
+ }
+ if (this.namespaceURI === "http://www.w3.org/2000/svg") {
+ return this.localName === "foreignObject"
+ || this.localName === "desc"
+ || this.localName === "title";
+ }
+ return false;
+};
+
+StackItem.prototype.isMathMLTextIntegrationPoint = function() {
+ if (this.namespaceURI === "http://www.w3.org/1998/Math/MathML") {
+ return this.localName === "mi"
+ || this.localName === "mo"
+ || this.localName === "mn"
+ || this.localName === "ms"
+ || this.localName === "mtext";
+ }
+ return false;
+};
+
+exports.StackItem = StackItem;
+
+},{}],5:[function(require,module,exports){
+var InputStream = require('./InputStream').InputStream;
+var EntityParser = require('./EntityParser').EntityParser;
+
+function isWhitespace(c){
+ return c === " " || c === "\n" || c === "\t" || c === "\r" || c === "\f";
+}
+
+function isAlpha(c) {
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+
+/**
+ *
+ * @param {Object} tokenHandler
+ * @constructor
+ */
+function Tokenizer(tokenHandler) {
+ this._tokenHandler = tokenHandler;
+ this._state = Tokenizer.DATA;
+ this._inputStream = new InputStream();
+ this._currentToken = null;
+ this._temporaryBuffer = '';
+ this._additionalAllowedCharacter = '';
+}
+
+Tokenizer.prototype._parseError = function(code, args) {
+ this._tokenHandler.parseError(code, args);
+};
+
+Tokenizer.prototype._emitToken = function(token) {
+ if (token.type === 'StartTag') {
+ for (var i = 1; i < token.data.length; i++) {
+ if (!token.data[i].nodeName)
+ token.data.splice(i--, 1);
+ }
+ } else if (token.type === 'EndTag') {
+ if (token.selfClosing) {
+ this._parseError('self-closing-flag-on-end-tag');
+ }
+ if (token.data.length !== 0) {
+ this._parseError('attributes-in-end-tag');
+ }
+ }
+ this._tokenHandler.processToken(token);
+ if (token.type === 'StartTag' && token.selfClosing && !this._tokenHandler.isSelfClosingFlagAcknowledged()) {
+ this._parseError('non-void-element-with-trailing-solidus', {name: token.name});
+ }
+};
+
+Tokenizer.prototype._emitCurrentToken = function() {
+ this._state = Tokenizer.DATA;
+ this._emitToken(this._currentToken);
+};
+
+Tokenizer.prototype._currentAttribute = function() {
+ return this._currentToken.data[this._currentToken.data.length - 1];
+};
+
+Tokenizer.prototype.setState = function(state) {
+ this._state = state;
+};
+
+Tokenizer.prototype.tokenize = function(source) {
+ // FIXME proper tokenizer states
+ Tokenizer.DATA = data_state;
+ Tokenizer.RCDATA = rcdata_state;
+ Tokenizer.RAWTEXT = rawtext_state;
+ Tokenizer.SCRIPT_DATA = script_data_state;
+ Tokenizer.PLAINTEXT = plaintext_state;
+
+
+ this._state = Tokenizer.DATA;
+
+ this._inputStream.append(source);
+
+ this._tokenHandler.startTokenization(this);
+
+ this._inputStream.eof = true;
+
+ var tokenizer = this;
+
+ while (this._state.call(this, this._inputStream));
+
+
+ function data_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._emitToken({type: 'EOF', data: null});
+ return false;
+ } else if (data === '&') {
+ tokenizer.setState(character_reference_in_data_state);
+ } else if (data === '<') {
+ tokenizer.setState(tag_open_state);
+ } else if (data === '\u0000') {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ buffer.commit();
+ } else {
+ var chars = buffer.matchUntil("&|<|\u0000");
+ tokenizer._emitToken({type: 'Characters', data: data + chars});
+ buffer.commit();
+ }
+ return true;
+ }
+
+ function character_reference_in_data_state(buffer) {
+ var character = EntityParser.consumeEntity(buffer, tokenizer);
+ tokenizer.setState(data_state);
+ tokenizer._emitToken({type: 'Characters', data: character || '&'});
+ return true;
+ }
+
+ function rcdata_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._emitToken({type: 'EOF', data: null});
+ return false;
+ } else if (data === '&') {
+ tokenizer.setState(character_reference_in_rcdata_state);
+ } else if (data === '<') {
+ tokenizer.setState(rcdata_less_than_sign_state);
+ } else if (data === "\u0000") {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ buffer.commit();
+ } else {
+ var chars = buffer.matchUntil("&|<|\u0000");
+ tokenizer._emitToken({type: 'Characters', data: data + chars});
+ buffer.commit();
+ }
+ return true;
+ }
+
+ function character_reference_in_rcdata_state(buffer) {
+ var character = EntityParser.consumeEntity(buffer, tokenizer);
+ tokenizer.setState(rcdata_state);
+ tokenizer._emitToken({type: 'Characters', data: character || '&'});
+ return true;
+ }
+
+ function rawtext_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._emitToken({type: 'EOF', data: null});
+ return false;
+ } else if (data === '<') {
+ tokenizer.setState(rawtext_less_than_sign_state);
+ } else if (data === "\u0000") {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ buffer.commit();
+ } else {
+ var chars = buffer.matchUntil("<|\u0000");
+ tokenizer._emitToken({type: 'Characters', data: data + chars});
+ }
+ return true;
+ }
+
+ function plaintext_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._emitToken({type: 'EOF', data: null});
+ return false;
+ } else if (data === "\u0000") {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ buffer.commit();
+ } else {
+ var chars = buffer.matchUntil("\u0000");
+ tokenizer._emitToken({type: 'Characters', data: data + chars});
+ }
+ return true;
+ }
+
+
+ function script_data_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._emitToken({type: 'EOF', data: null});
+ return false;
+ } else if (data === '<') {
+ tokenizer.setState(script_data_less_than_sign_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ buffer.commit();
+ } else {
+ var chars = buffer.matchUntil("<|\u0000");
+ tokenizer._emitToken({type: 'Characters', data: data + chars});
+ }
+ return true;
+ }
+
+ function rcdata_less_than_sign_state(buffer) {
+ var data = buffer.char();
+ if (data === "/") {
+ this._temporaryBuffer = '';
+ tokenizer.setState(rcdata_end_tag_open_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ buffer.unget(data);
+ tokenizer.setState(rcdata_state);
+ }
+ return true;
+ }
+
+ function rcdata_end_tag_open_state(buffer) {
+ var data = buffer.char();
+ if (isAlpha(data)) {
+ this._temporaryBuffer += data;
+ tokenizer.setState(rcdata_end_tag_name_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: ''});
+ buffer.unget(data);
+ tokenizer.setState(rcdata_state);
+ }
+ return true;
+ }
+
+ function rcdata_end_tag_name_state(buffer) {
+ var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase());
+ var data = buffer.char();
+ if (isWhitespace(data) && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false};
+ tokenizer.setState(before_attribute_name_state);
+ } else if (data === '/' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false};
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === '>' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false};
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else if (isAlpha(data)) {
+ this._temporaryBuffer += data;
+ buffer.commit();
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '' + this._temporaryBuffer});
+ buffer.unget(data);
+ tokenizer.setState(rcdata_state);
+ }
+ return true;
+ }
+
+ function rawtext_less_than_sign_state(buffer) {
+ var data = buffer.char();
+ if (data === "/") {
+ this._temporaryBuffer = '';
+ tokenizer.setState(rawtext_end_tag_open_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ buffer.unget(data);
+ tokenizer.setState(rawtext_state);
+ }
+ return true;
+ }
+
+ function rawtext_end_tag_open_state(buffer) {
+ var data = buffer.char();
+ if (isAlpha(data)) {
+ this._temporaryBuffer += data;
+ tokenizer.setState(rawtext_end_tag_name_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: ''});
+ buffer.unget(data);
+ tokenizer.setState(rawtext_state);
+ }
+ return true;
+ }
+
+ function rawtext_end_tag_name_state(buffer) {
+ var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase());
+ var data = buffer.char();
+ if (isWhitespace(data) && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false};
+ tokenizer.setState(before_attribute_name_state);
+ } else if (data === '/' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false};
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === '>' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false};
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else if (isAlpha(data)) {
+ this._temporaryBuffer += data;
+ buffer.commit();
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '' + this._temporaryBuffer});
+ buffer.unget(data);
+ tokenizer.setState(rawtext_state);
+ }
+ return true;
+ }
+
+ function script_data_less_than_sign_state(buffer) {
+ var data = buffer.char();
+ if (data === "/") {
+ this._temporaryBuffer = '';
+ tokenizer.setState(script_data_end_tag_open_state);
+ } else if (data === '!') {
+ tokenizer._emitToken({type: 'Characters', data: '' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false};
+ tokenizer._emitCurrentToken();
+ } else if (isAlpha(data)) {
+ this._temporaryBuffer += data;
+ buffer.commit();
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '' + this._temporaryBuffer});
+ buffer.unget(data);
+ tokenizer.setState(script_data_state);
+ }
+ return true;
+ }
+
+ function script_data_escape_start_state(buffer) {
+ var data = buffer.char();
+ if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ tokenizer.setState(script_data_escape_start_dash_state);
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(script_data_state);
+ }
+ return true;
+ }
+
+ function script_data_escape_start_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ tokenizer.setState(script_data_escaped_dash_dash_state);
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(script_data_state);
+ }
+ return true;
+ }
+
+ function script_data_escaped_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ tokenizer.setState(script_data_escaped_dash_state);
+ } else if (data === '<') {
+ tokenizer.setState(script_data_escaped_less_then_sign_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ buffer.commit();
+ } else {
+ var chars = buffer.matchUntil('<|-|\u0000');
+ tokenizer._emitToken({type: 'Characters', data: data + chars});
+ }
+ return true;
+ }
+
+ function script_data_escaped_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ tokenizer.setState(script_data_escaped_dash_dash_state);
+ } else if (data === '<') {
+ tokenizer.setState(script_data_escaped_less_then_sign_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ tokenizer.setState(script_data_escaped_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ tokenizer.setState(script_data_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_escaped_dash_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError('eof-in-script');
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '<') {
+ tokenizer.setState(script_data_escaped_less_then_sign_state);
+ } else if (data === '>') {
+ tokenizer._emitToken({type: 'Characters', data: '>'});
+ tokenizer.setState(script_data_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ tokenizer.setState(script_data_escaped_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ tokenizer.setState(script_data_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_escaped_less_then_sign_state(buffer) {
+ var data = buffer.char();
+ if (data === '/') {
+ this._temporaryBuffer = '';
+ tokenizer.setState(script_data_escaped_end_tag_open_state);
+ } else if (isAlpha(data)) {
+ tokenizer._emitToken({type: 'Characters', data: '<' + data});
+ this._temporaryBuffer = data;
+ tokenizer.setState(script_data_double_escape_start_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ buffer.unget(data);
+ tokenizer.setState(script_data_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_escaped_end_tag_open_state(buffer) {
+ var data = buffer.char();
+ if (isAlpha(data)) {
+ this._temporaryBuffer = data;
+ tokenizer.setState(script_data_escaped_end_tag_name_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: ''});
+ buffer.unget(data);
+ tokenizer.setState(script_data_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_escaped_end_tag_name_state(buffer) {
+ var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase());
+ var data = buffer.char();
+ if (isWhitespace(data) && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false};
+ tokenizer.setState(before_attribute_name_state);
+ } else if (data === '/' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false};
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === '>' && appropriate) {
+ tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false};
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isAlpha(data)) {
+ this._temporaryBuffer += data;
+ buffer.commit();
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: '' + this._temporaryBuffer});
+ buffer.unget(data);
+ tokenizer.setState(script_data_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_double_escape_start_state(buffer) {
+ var data = buffer.char();
+ if (isWhitespace(data) || data === '/' || data === '>') {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ if (this._temporaryBuffer.toLowerCase() === 'script')
+ tokenizer.setState(script_data_double_escaped_state);
+ else
+ tokenizer.setState(script_data_escaped_state);
+ } else if (isAlpha(data)) {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ this._temporaryBuffer += data;
+ buffer.commit();
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(script_data_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_double_escaped_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError('eof-in-script');
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ tokenizer.setState(script_data_double_escaped_dash_state);
+ } else if (data === '<') {
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ tokenizer.setState(script_data_double_escaped_less_than_sign_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError('invalid-codepoint');
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ buffer.commit();
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ buffer.commit();
+ }
+ return true;
+ }
+
+ function script_data_double_escaped_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError('eof-in-script');
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ tokenizer.setState(script_data_double_escaped_dash_dash_state);
+ } else if (data === '<') {
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ tokenizer.setState(script_data_double_escaped_less_than_sign_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError('invalid-codepoint');
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ tokenizer.setState(script_data_double_escaped_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ tokenizer.setState(script_data_double_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_double_escaped_dash_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError('eof-in-script');
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer._emitToken({type: 'Characters', data: '-'});
+ buffer.commit();
+ } else if (data === '<') {
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ tokenizer.setState(script_data_double_escaped_less_than_sign_state);
+ } else if (data === '>') {
+ tokenizer._emitToken({type: 'Characters', data: '>'});
+ tokenizer.setState(script_data_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError('invalid-codepoint');
+ tokenizer._emitToken({type: 'Characters', data: '\uFFFD'});
+ tokenizer.setState(script_data_double_escaped_state);
+ } else {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ tokenizer.setState(script_data_double_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_double_escaped_less_than_sign_state(buffer) {
+ var data = buffer.char();
+ if (data === '/') {
+ tokenizer._emitToken({type: 'Characters', data: '/'});
+ this._temporaryBuffer = '';
+ tokenizer.setState(script_data_double_escape_end_state);
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(script_data_double_escaped_state);
+ }
+ return true;
+ }
+
+ function script_data_double_escape_end_state(buffer) {
+ var data = buffer.char();
+ if (isWhitespace(data) || data === '/' || data === '>') {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ if (this._temporaryBuffer.toLowerCase() === 'script')
+ tokenizer.setState(script_data_escaped_state);
+ else
+ tokenizer.setState(script_data_double_escaped_state);
+ } else if (isAlpha(data)) {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ this._temporaryBuffer += data;
+ buffer.commit();
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(script_data_double_escaped_state);
+ }
+ return true;
+ }
+
+ function tag_open_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("bare-less-than-sign-at-eof");
+ tokenizer._emitToken({type: 'Characters', data: '<'});
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isAlpha(data)) {
+ tokenizer._currentToken = {type: 'StartTag', name: data.toLowerCase(), data: []};
+ tokenizer.setState(tag_name_state);
+ } else if (data === '!') {
+ tokenizer.setState(markup_declaration_open_state);
+ } else if (data === '/') {
+ tokenizer.setState(close_tag_open_state);
+ } else if (data === '>') {
+ // XXX In theory it could be something besides a tag name. But
+ // do we really care?
+ tokenizer._parseError("expected-tag-name-but-got-right-bracket");
+ tokenizer._emitToken({type: 'Characters', data: "<>"});
+ tokenizer.setState(data_state);
+ } else if (data === '?') {
+ // XXX In theory it could be something besides a tag name. But
+ // do we really care?
+ tokenizer._parseError("expected-tag-name-but-got-question-mark");
+ buffer.unget(data);
+ tokenizer.setState(bogus_comment_state);
+ } else {
+ // XXX
+ tokenizer._parseError("expected-tag-name");
+ tokenizer._emitToken({type: 'Characters', data: "<"});
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ }
+ return true;
+ }
+
+ function close_tag_open_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("expected-closing-tag-but-got-eof");
+ tokenizer._emitToken({type: 'Characters', data: ''});
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isAlpha(data)) {
+ tokenizer._currentToken = {type: 'EndTag', name: data.toLowerCase(), data: []};
+ tokenizer.setState(tag_name_state);
+ } else if (data === '>') {
+ tokenizer._parseError("expected-closing-tag-but-got-right-bracket");
+ tokenizer.setState(data_state);
+ } else {
+ tokenizer._parseError("expected-closing-tag-but-got-char", {data: data}); // param 1 is datavars:
+ buffer.unget(data);
+ tokenizer.setState(bogus_comment_state);
+ }
+ return true;
+ }
+
+ function tag_name_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError('eof-in-tag-name');
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(before_attribute_name_state);
+ } else if (isAlpha(data)) {
+ tokenizer._currentToken.name += data.toLowerCase();
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ } else if (data === '/') {
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.name += "\uFFFD";
+ } else {
+ tokenizer._currentToken.name += data;
+ }
+ buffer.commit();
+
+ return true;
+ }
+
+ function before_attribute_name_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("expected-attribute-name-but-got-eof");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ return true;
+ } else if (isAlpha(data)) {
+ tokenizer._currentToken.data.push({nodeName: data.toLowerCase(), nodeValue: ""});
+ tokenizer.setState(attribute_name_state);
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ } else if (data === '/') {
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === "'" || data === '"' || data === '=' || data === '<') {
+ tokenizer._parseError("invalid-character-in-attribute-name");
+ tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""});
+ tokenizer.setState(attribute_name_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data.push({nodeName: "\uFFFD", nodeValue: ""});
+ } else {
+ tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""});
+ tokenizer.setState(attribute_name_state);
+ }
+ return true;
+ }
+
+ function attribute_name_state(buffer) {
+ var data = buffer.char();
+ var leavingThisState = true;
+ var shouldEmit = false;
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-attribute-name");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ shouldEmit = true;
+ } else if (data === '=') {
+ tokenizer.setState(before_attribute_value_state);
+ } else if (isAlpha(data)) {
+ tokenizer._currentAttribute().nodeName += data.toLowerCase();
+ leavingThisState = false;
+ } else if (data === '>') {
+ // XXX If we emit here the attributes are converted to a dict
+ // without being checked and when the code below runs we error
+ // because data is a dict not a list
+ shouldEmit = true;
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(after_attribute_name_state);
+ } else if (data === '/') {
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === "'" || data === '"') {
+ tokenizer._parseError("invalid-character-in-attribute-name");
+ tokenizer._currentAttribute().nodeName += data;
+ leavingThisState = false;
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentAttribute().nodeName += "\uFFFD";
+ } else {
+ tokenizer._currentAttribute().nodeName += data;
+ leavingThisState = false;
+ }
+
+ if (leavingThisState) {
+ // Attributes are not dropped at this stage. That happens when the
+ // start tag token is emitted so values can still be safely appended
+ // to attributes, but we do want to report the parse error in time.
+ var attributes = tokenizer._currentToken.data;
+ var currentAttribute = attributes[attributes.length - 1];
+ for (var i = attributes.length - 2; i >= 0; i--) {
+ if (currentAttribute.nodeName === attributes[i].nodeName) {
+ tokenizer._parseError("duplicate-attribute", {name: currentAttribute.nodeName});
+ currentAttribute.nodeName = null;
+ break;
+ }
+ }
+ if (shouldEmit)
+ tokenizer._emitCurrentToken();
+ } else {
+ buffer.commit();
+ }
+ return true;
+ }
+
+ function after_attribute_name_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("expected-end-of-tag-but-got-eof");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ return true;
+ } else if (data === '=') {
+ tokenizer.setState(before_attribute_value_state);
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ } else if (isAlpha(data)) {
+ tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""});
+ tokenizer.setState(attribute_name_state);
+ } else if (data === '/') {
+ tokenizer.setState(self_closing_tag_state);
+ } else if (data === "'" || data === '"' || data === '<') {
+ tokenizer._parseError("invalid-character-after-attribute-name");
+ tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""});
+ tokenizer.setState(attribute_name_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data.push({nodeName: "\uFFFD", nodeValue: ""});
+ } else {
+ tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""});
+ tokenizer.setState(attribute_name_state);
+ }
+ return true;
+ }
+
+ function before_attribute_value_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("expected-attribute-value-but-got-eof");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ return true;
+ } else if (data === '"') {
+ tokenizer.setState(attribute_value_double_quoted_state);
+ } else if (data === '&') {
+ tokenizer.setState(attribute_value_unquoted_state);
+ buffer.unget(data);
+ } else if (data === "'") {
+ tokenizer.setState(attribute_value_single_quoted_state);
+ } else if (data === '>') {
+ tokenizer._parseError("expected-attribute-value-but-got-right-bracket");
+ tokenizer._emitCurrentToken();
+ } else if (data === '=' || data === '<' || data === '`') {
+ tokenizer._parseError("unexpected-character-in-unquoted-attribute-value");
+ tokenizer._currentAttribute().nodeValue += data;
+ tokenizer.setState(attribute_value_unquoted_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentAttribute().nodeValue += "\uFFFD";
+ } else {
+ tokenizer._currentAttribute().nodeValue += data;
+ tokenizer.setState(attribute_value_unquoted_state);
+ }
+
+ return true;
+ }
+
+ function attribute_value_double_quoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-attribute-value-double-quote");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '"') {
+ tokenizer.setState(after_attribute_value_state);
+ } else if (data === '&') {
+ this._additionalAllowedCharacter = '"';
+ tokenizer.setState(character_reference_in_attribute_value_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentAttribute().nodeValue += "\uFFFD";
+ } else {
+ var s = buffer.matchUntil('[\0"&]');
+ data = data + s;
+ tokenizer._currentAttribute().nodeValue += data;
+ }
+ return true;
+ }
+
+ function attribute_value_single_quoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-attribute-value-single-quote");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === "'") {
+ tokenizer.setState(after_attribute_value_state);
+ } else if (data === '&') {
+ this._additionalAllowedCharacter = "'";
+ tokenizer.setState(character_reference_in_attribute_value_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentAttribute().nodeValue += "\uFFFD";
+ } else {
+ tokenizer._currentAttribute().nodeValue += data + buffer.matchUntil("\u0000|['&]");
+ }
+ return true;
+ }
+
+ function attribute_value_unquoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-after-attribute-value");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(before_attribute_name_state);
+ } else if (data === '&') {
+ this._additionalAllowedCharacter = ">";
+ tokenizer.setState(character_reference_in_attribute_value_state);
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ } else if (data === '"' || data === "'" || data === '=' || data === '`' || data === '<') {
+ tokenizer._parseError("unexpected-character-in-unquoted-attribute-value");
+ tokenizer._currentAttribute().nodeValue += data;
+ buffer.commit();
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentAttribute().nodeValue += "\uFFFD";
+ } else {
+ var o = buffer.matchUntil("\u0000|["+ "\t\n\v\f\x20\r" + "&<>\"'=`" +"]");
+ if (o === InputStream.EOF) {
+ tokenizer._parseError("eof-in-attribute-value-no-quotes");
+ tokenizer._emitCurrentToken();
+ }
+ // Commit here since this state is re-enterable and its outcome won't change with more data.
+ buffer.commit();
+ tokenizer._currentAttribute().nodeValue += data + o;
+ }
+ return true;
+ }
+
+ function character_reference_in_attribute_value_state(buffer) {
+ var character = EntityParser.consumeEntity(buffer, tokenizer, this._additionalAllowedCharacter);
+ this._currentAttribute().nodeValue += character || '&';
+ // We're supposed to switch back to the attribute value state that
+ // we were in when we were switched into this state. Rather than
+ // keeping track of this explictly, we observe that the previous
+ // state can be determined by additionalAllowedCharacter.
+ if (this._additionalAllowedCharacter === '"')
+ tokenizer.setState(attribute_value_double_quoted_state);
+ else if (this._additionalAllowedCharacter === '\'')
+ tokenizer.setState(attribute_value_single_quoted_state);
+ else if (this._additionalAllowedCharacter === '>')
+ tokenizer.setState(attribute_value_unquoted_state);
+ return true;
+ }
+
+ function after_attribute_value_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-after-attribute-value");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(before_attribute_name_state);
+ } else if (data === '>') {
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (data === '/') {
+ tokenizer.setState(self_closing_tag_state);
+ } else {
+ tokenizer._parseError("unexpected-character-after-attribute-value");
+ buffer.unget(data);
+ tokenizer.setState(before_attribute_name_state);
+ }
+ return true;
+ }
+
+ function self_closing_tag_state(buffer) {
+ var c = buffer.char();
+ if (c === InputStream.EOF) {
+ tokenizer._parseError("unexpected-eof-after-solidus-in-tag");
+ buffer.unget(c);
+ tokenizer.setState(data_state);
+ } else if (c === '>') {
+ tokenizer._currentToken.selfClosing = true;
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ tokenizer._parseError("unexpected-character-after-solidus-in-tag");
+ buffer.unget(c);
+ tokenizer.setState(before_attribute_name_state);
+ }
+ return true;
+ }
+
+ function bogus_comment_state(buffer) {
+ var data = buffer.matchUntil('>');
+ data = data.replace(/\u0000/g, "\uFFFD");
+ buffer.char();
+ tokenizer._emitToken({type: 'Comment', data: data});
+ tokenizer.setState(data_state);
+ return true;
+ }
+
+ function markup_declaration_open_state(buffer) {
+ var chars = buffer.shift(2);
+ if (chars === '--') {
+ tokenizer._currentToken = {type: 'Comment', data: ''};
+ tokenizer.setState(comment_start_state);
+ } else {
+ var newchars = buffer.shift(5);
+ if (newchars === InputStream.EOF || chars === InputStream.EOF) {
+ tokenizer._parseError("expected-dashes-or-doctype");
+ tokenizer.setState(bogus_comment_state);
+ buffer.unget(chars);
+ return true;
+ }
+
+ chars += newchars;
+ if (chars.toUpperCase() === 'DOCTYPE') {
+ tokenizer._currentToken = {type: 'Doctype', name: '', publicId: null, systemId: null, forceQuirks: false};
+ tokenizer.setState(doctype_state);
+ } else if (tokenizer._tokenHandler.isCdataSectionAllowed() && chars === '[CDATA[') {
+ tokenizer.setState(cdata_section_state);
+ } else {
+ tokenizer._parseError("expected-dashes-or-doctype");
+ buffer.unget(chars);
+ tokenizer.setState(bogus_comment_state);
+ }
+ }
+ return true;
+ }
+
+ function cdata_section_state(buffer) {
+ var data = buffer.matchUntil(']]>');
+ // skip ]]>
+ buffer.shift(3);
+ if (data) {
+ tokenizer._emitToken({type: 'Characters', data: data});
+ }
+ tokenizer.setState(data_state);
+ return true;
+ }
+
+ function comment_start_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-comment");
+ tokenizer._emitToken(tokenizer._currentToken);
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer.setState(comment_start_dash_state);
+ } else if (data === '>') {
+ tokenizer._parseError("incorrect-comment");
+ tokenizer._emitToken(tokenizer._currentToken);
+ tokenizer.setState(data_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data += "\uFFFD";
+ } else {
+ tokenizer._currentToken.data += data;
+ tokenizer.setState(comment_state);
+ }
+ return true;
+ }
+
+ function comment_start_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-comment");
+ tokenizer._emitToken(tokenizer._currentToken);
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer.setState(comment_end_state);
+ } else if (data === '>') {
+ tokenizer._parseError("incorrect-comment");
+ tokenizer._emitToken(tokenizer._currentToken);
+ tokenizer.setState(data_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data += "\uFFFD";
+ } else {
+ tokenizer._currentToken.data += '-' + data;
+ tokenizer.setState(comment_state);
+ }
+ return true;
+ }
+
+ function comment_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-comment");
+ tokenizer._emitToken(tokenizer._currentToken);
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer.setState(comment_end_dash_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data += "\uFFFD";
+ } else {
+ tokenizer._currentToken.data += data;
+ buffer.commit();
+ }
+ return true;
+ }
+
+ function comment_end_dash_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-comment-end-dash");
+ tokenizer._emitToken(tokenizer._currentToken);
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer.setState(comment_end_state);
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data += "-\uFFFD";
+ tokenizer.setState(comment_state);
+ } else {
+ tokenizer._currentToken.data += '-' + data + buffer.matchUntil('\u0000|-');
+ // Consume the next character which is either a "-" or an :EOF as
+ // well so if there's a "-" directly after the "-" we go nicely to
+ // the "comment end state" without emitting a tokenizer._parseError there.
+ buffer.char();
+ }
+ return true;
+ }
+
+ function comment_end_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-comment-double-dash");
+ tokenizer._emitToken(tokenizer._currentToken);
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '>') {
+ tokenizer._emitToken(tokenizer._currentToken);
+ tokenizer.setState(data_state);
+ } else if (data === '!') {
+ tokenizer._parseError("unexpected-bang-after-double-dash-in-comment");
+ tokenizer.setState(comment_end_bang_state);
+ } else if (data === '-') {
+ tokenizer._parseError("unexpected-dash-after-double-dash-in-comment");
+ tokenizer._currentToken.data += data;
+ } else if (data === '\u0000') {
+ tokenizer._parseError("invalid-codepoint");
+ tokenizer._currentToken.data += "--\uFFFD";
+ tokenizer.setState(comment_state);
+ } else {
+ // XXX
+ tokenizer._parseError("unexpected-char-in-comment");
+ tokenizer._currentToken.data += '--' + data;
+ tokenizer.setState(comment_state);
+ }
+ return true;
+ }
+
+ function comment_end_bang_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-comment-end-bang-state");
+ tokenizer._emitToken(tokenizer._currentToken);
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '>') {
+ tokenizer._emitToken(tokenizer._currentToken);
+ tokenizer.setState(data_state);
+ } else if (data === '-') {
+ tokenizer._currentToken.data += '--!';
+ tokenizer.setState(comment_end_dash_state);
+ } else {
+ tokenizer._currentToken.data += '--!' + data;
+ tokenizer.setState(comment_state);
+ }
+ return true;
+ }
+
+ function doctype_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("expected-doctype-name-but-got-eof");
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(before_doctype_name_state);
+ } else {
+ tokenizer._parseError("need-space-after-doctype");
+ buffer.unget(data);
+ tokenizer.setState(before_doctype_name_state);
+ }
+ return true;
+ }
+
+ function before_doctype_name_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("expected-doctype-name-but-got-eof");
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isWhitespace(data)) {
+ // pass
+ } else if (data === '>') {
+ tokenizer._parseError("expected-doctype-name-but-got-right-bracket");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ if (isAlpha(data))
+ data = data.toLowerCase();
+ tokenizer._currentToken.name = data;
+ tokenizer.setState(doctype_name_state);
+ }
+ return true;
+ }
+
+ function doctype_name_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer._parseError("eof-in-doctype-name");
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(after_doctype_name_state);
+ } else if (data === '>') {
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ if (isAlpha(data))
+ data = data.toLowerCase();
+ tokenizer._currentToken.name += data;
+ buffer.commit();
+ }
+ return true;
+ }
+
+ function after_doctype_name_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isWhitespace(data)) {
+ // pass
+ } else if (data === '>') {
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ if (['p', 'P'].indexOf(data) > -1) {
+ var expected = [['u', 'U'], ['b', 'B'], ['l', 'L'], ['i', 'I'], ['c', 'C']];
+ var matched = expected.every(function(expected){
+ data = buffer.char();
+ return expected.indexOf(data) > -1;
+ });
+ if (matched) {
+ tokenizer.setState(after_doctype_public_keyword_state);
+ return true;
+ }
+ } else if (['s', 'S'].indexOf(data) > -1) {
+ var expected = [['y', 'Y'], ['s', 'S'], ['t', 'T'], ['e', 'E'], ['m', 'M']];
+ var matched = expected.every(function(expected){
+ data = buffer.char();
+ return expected.indexOf(data) > -1;
+ });
+ if (matched) {
+ tokenizer.setState(after_doctype_system_keyword_state);
+ return true;
+ }
+ }
+
+ // All the characters read before the current 'data' will be
+ // [a-zA-Z], so they're garbage in the bogus doctype and can be
+ // discarded; only the latest character might be '>' or EOF
+ // and needs to be ungetted
+ buffer.unget(data);
+ tokenizer._currentToken.forceQuirks = true;
+
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ tokenizer._parseError("expected-space-or-right-bracket-in-doctype", {data: data});
+ tokenizer.setState(bogus_doctype_state);
+ }
+ }
+ return true;
+ }
+
+ function after_doctype_public_keyword_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(before_doctype_public_identifier_state);
+ } else if (data === "'" || data === '"') {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ buffer.unget(data);
+ tokenizer.setState(before_doctype_public_identifier_state);
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(before_doctype_public_identifier_state);
+ }
+ return true;
+ }
+
+ function before_doctype_public_identifier_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (isWhitespace(data)) {
+ // pass
+ } else if (data === '"') {
+ tokenizer._currentToken.publicId = '';
+ tokenizer.setState(doctype_public_identifier_double_quoted_state);
+ } else if (data === "'") {
+ tokenizer._currentToken.publicId = '';
+ tokenizer.setState(doctype_public_identifier_single_quoted_state);
+ } else if (data === '>') {
+ tokenizer._parseError("unexpected-end-of-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(bogus_doctype_state);
+ }
+ return true;
+ }
+
+ function doctype_public_identifier_double_quoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (data === '"') {
+ tokenizer.setState(after_doctype_public_identifier_state);
+ } else if (data === '>') {
+ tokenizer._parseError("unexpected-end-of-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ tokenizer._currentToken.publicId += data;
+ }
+ return true;
+ }
+
+ function doctype_public_identifier_single_quoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (data === "'") {
+ tokenizer.setState(after_doctype_public_identifier_state);
+ } else if (data === '>') {
+ tokenizer._parseError("unexpected-end-of-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else {
+ tokenizer._currentToken.publicId += data;
+ }
+ return true;
+ }
+
+ function after_doctype_public_identifier_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(between_doctype_public_and_system_identifiers_state);
+ } else if (data === '>') {
+ tokenizer.setState(data_state);
+ tokenizer._emitCurrentToken();
+ } else if (data === '"') {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer._currentToken.systemId = '';
+ tokenizer.setState(doctype_system_identifier_double_quoted_state);
+ } else if (data === "'") {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer._currentToken.systemId = '';
+ tokenizer.setState(doctype_system_identifier_single_quoted_state);
+ } else {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(bogus_doctype_state);
+ }
+ return true;
+ }
+
+ function between_doctype_public_and_system_identifiers_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ // pass
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else if (data === '"') {
+ tokenizer._currentToken.systemId = '';
+ tokenizer.setState(doctype_system_identifier_double_quoted_state);
+ } else if (data === "'") {
+ tokenizer._currentToken.systemId = '';
+ tokenizer.setState(doctype_system_identifier_single_quoted_state);
+ } else {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(bogus_doctype_state);
+ }
+ return true;
+ }
+
+ function after_doctype_system_keyword_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ tokenizer.setState(before_doctype_system_identifier_state);
+ } else if (data === "'" || data === '"') {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ buffer.unget(data);
+ tokenizer.setState(before_doctype_system_identifier_state);
+ } else {
+ buffer.unget(data);
+ tokenizer.setState(before_doctype_system_identifier_state);
+ }
+ return true;
+ }
+
+ function before_doctype_system_identifier_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ // pass
+ } else if (data === '"') {
+ tokenizer._currentToken.systemId = '';
+ tokenizer.setState(doctype_system_identifier_double_quoted_state);
+ } else if (data === "'") {
+ tokenizer._currentToken.systemId = '';
+ tokenizer.setState(doctype_system_identifier_single_quoted_state);
+ } else if (data === '>') {
+ tokenizer._parseError("unexpected-end-of-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer.setState(bogus_doctype_state);
+ }
+ return true;
+ }
+
+ function doctype_system_identifier_double_quoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === '"') {
+ tokenizer.setState(after_doctype_system_identifier_state);
+ } else if (data === '>') {
+ tokenizer._parseError("unexpected-end-of-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else {
+ tokenizer._currentToken.systemId += data;
+ }
+ return true;
+ }
+
+ function doctype_system_identifier_single_quoted_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (data === "'") {
+ tokenizer.setState(after_doctype_system_identifier_state);
+ } else if (data === '>') {
+ tokenizer._parseError("unexpected-end-of-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else {
+ tokenizer._currentToken.systemId += data;
+ }
+ return true;
+ }
+
+ function after_doctype_system_identifier_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ tokenizer._parseError("eof-in-doctype");
+ tokenizer._currentToken.forceQuirks = true;
+ tokenizer._emitCurrentToken();
+ buffer.unget(data);
+ tokenizer.setState(data_state);
+ } else if (isWhitespace(data)) {
+ // pass
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else {
+ tokenizer._parseError("unexpected-char-in-doctype");
+ tokenizer.setState(bogus_doctype_state);
+ }
+ return true;
+ }
+
+ function bogus_doctype_state(buffer) {
+ var data = buffer.char();
+ if (data === InputStream.EOF) {
+ buffer.unget(data);
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ } else if (data === '>') {
+ tokenizer._emitCurrentToken();
+ tokenizer.setState(data_state);
+ }
+ return true;
+ }
+};
+
+exports.Tokenizer = Tokenizer;
+
+},{"./EntityParser":2,"./InputStream":3}],6:[function(require,module,exports){
+(function(){var assert = require('assert');
+
+var messages = require('./messages.json');
+var constants = require('./constants');
+
+var EventEmitter = require('events').EventEmitter;
+
+var Tokenizer = require('./Tokenizer').Tokenizer;
+var ElementStack = require('./ElementStack').ElementStack;
+var StackItem = require('./StackItem').StackItem;
+
+var Marker = {};
+
+function isWhitespace(ch) {
+ return ch === " " || ch === "\n" || ch === "\t" || ch === "\r" || ch === "\f";
+}
+
+function isWhitespaceOrReplacementCharacter(ch) {
+ return isWhitespace(ch) || ch === '\uFFFD';
+}
+
+function isAllWhitespace(characters) {
+ for (var i = 0; i < characters.length; i++) {
+ var ch = characters[i];
+ if (!isWhitespace(ch))
+ return false;
+ }
+ return true;
+}
+
+function isAllWhitespaceOrReplacementCharacters(characters) {
+ for (var i = 0; i < characters.length; i++) {
+ var ch = characters[i];
+ if (!isWhitespaceOrReplacementCharacter(ch))
+ return false;
+ }
+ return true;
+}
+
+function getAttribute(node, name) {
+ for (var i = 0; i < node.attributes.length; i++) {
+ var attribute = node.attributes[i];
+ if (attribute.nodeName === name) {
+ return attribute;
+ }
+ }
+ return null;
+}
+
+function CharacterBuffer(characters) {
+ this.characters = characters;
+ this.current = 0;
+ this.end = this.characters.length;
+}
+
+CharacterBuffer.prototype.skipAtMostOneLeadingNewline = function() {
+ if (this.characters[this.current] === '\n')
+ this.current++;
+};
+
+CharacterBuffer.prototype.skipLeadingWhitespace = function() {
+ while (isWhitespace(this.characters[this.current])) {
+ if (++this.current == this.end)
+ return;
+ }
+};
+
+CharacterBuffer.prototype.skipLeadingNonWhitespace = function() {
+ while (!isWhitespace(this.characters[this.current])) {
+ if (++this.current == this.end)
+ return;
+ }
+};
+
+CharacterBuffer.prototype.takeRemaining = function() {
+ return this.characters.substring(this.current);
+};
+
+CharacterBuffer.prototype.takeLeadingWhitespace = function() {
+ var start = this.current;
+ this.skipLeadingWhitespace();
+ if (start === this.current)
+ return "";
+ return this.characters.substring(start, this.current - start);
+};
+
+Object.defineProperty(CharacterBuffer.prototype, 'length', {
+ get: function(){
+ return this.end - this.current;
+ }
+});
+
+/**
+ *
+ * @constructor
+ */
+function TreeBuilder() {
+ this.tokenizer = null;
+ this.errorHandler = null;
+ this.scriptingEnabled = false;
+ this.document = null;
+ this.head = null;
+ this.form = null;
+ this.openElements = new ElementStack();
+ this.activeFormattingElements = [];
+ this.insertionMode = null;
+ this.insertionModeName = "";
+ this.originalInsertionMode = "";
+ this.inQuirksMode = false; // TODO quirks mode
+ this.compatMode = "no quirks";
+ this.framesetOk = true;
+ this.redirectAttachToFosterParent = false;
+ this.selfClosingFlagAcknowledged = false;
+ this.context = "";
+ this.firstStartTag = false;
+ this.pendingTableCharacters = [];
+ this.shouldSkipLeadingNewline = false;
+
+ var tree = this;
+ var modes = this.insertionModes = {};
+ modes.base = {
+ end_tag_handlers: {"-default": 'endTagOther'},
+ start_tag_handlers: {"-default": 'startTagOther'},
+ processEOF: function() {
+ tree.generateImpliedEndTags();
+ if (tree.openElements.length > 2) {
+ tree.parseError('expected-closing-tag-but-got-eof');
+ } else if (tree.openElements.length == 2 &&
+ tree.openElements.item(1).localName != 'body') {
+ // This happens for framesets or something?
+ tree.parseError('expected-closing-tag-but-got-eof');
+ } else if (tree.context && tree.openElements.length > 1) {
+ // XXX This is not what the specification says. Not sure what to do here.
+ //tree.parseError('eof-in-innerhtml');
+ }
+ },
+ processComment: function(data) {
+ // For most phases the following is forceQuirks. Where it's not it will be
+ // overridden.
+ tree.insertComment(data, tree.currentStackItem().node);
+ },
+ processDoctype: function(name, publicId, systemId, forceQuirks) {
+ tree.parseError('unexpected-doctype');
+ },
+ processStartTag: function(name, attributes, selfClosing) {
+ if (this[this.start_tag_handlers[name]]) {
+ this[this.start_tag_handlers[name]](name, attributes, selfClosing);
+ } else if (this[this.start_tag_handlers["-default"]]) {
+ this[this.start_tag_handlers["-default"]](name, attributes, selfClosing);
+ } else {
+ throw(new Error("No handler found for "+name));
+ }
+ },
+ processEndTag: function(name) {
+ if (this[this.end_tag_handlers[name]]) {
+ this[this.end_tag_handlers[name]](name);
+ } else if (this[this.end_tag_handlers["-default"]]) {
+ this[this.end_tag_handlers["-default"]](name);
+ } else {
+ throw(new Error("No handler found for "+name));
+ }
+ },
+ startTagHtml: function(name, attributes) {
+ if (!tree.firstStartTag && name == 'html') {
+ tree.parseError('non-html-root');
+ }
+ tree.addAttributesToElement(tree.openElements.rootNode, attributes);
+ tree.firstStartTag = false;
+ }
+ };
+
+ modes.initial = Object.create(modes.base);
+
+ modes.initial.processEOF = function() {
+ tree.parseError("expected-doctype-but-got-eof");
+ this.anythingElse();
+ tree.insertionMode.processEOF();
+ };
+
+ modes.initial.processComment = function(data) {
+ tree.insertComment(data, tree.document);
+ };
+
+ modes.initial.processDoctype = function(name, publicId, systemId, forceQuirks) {
+ tree.insertDoctype(name || '', publicId || '', systemId || '');
+
+ if (forceQuirks || name != 'html' || (publicId != null && ([
+ "+//silmaril//dtd html pro v0r11 19970101//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//ietf//dtd html//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//",
+ "html"
+ ].some(publicIdStartsWith)
+ || [
+ "-//w3o//dtd w3 html strict 3.0//en//",
+ "-/w3c/dtd html 4.0 transitional/en",
+ "html"
+ ].indexOf(publicId.toLowerCase()) > -1
+ || (systemId == null && [
+ "-//w3c//dtd html 4.01 transitional//",
+ "-//w3c//dtd html 4.01 frameset//"
+ ].some(publicIdStartsWith)))
+ )
+ || (systemId != null && (systemId.toLowerCase() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))
+ ) {
+ tree.compatMode = "quirks";
+ tree.parseError("quirky-doctype");
+ } else if (publicId != null && ([
+ "-//w3c//dtd xhtml 1.0 transitional//",
+ "-//w3c//dtd xhtml 1.0 frameset//"
+ ].some(publicIdStartsWith)
+ || (systemId != null && [
+ "-//w3c//dtd html 4.01 transitional//",
+ "-//w3c//dtd html 4.01 frameset//"
+ ].indexOf(publicId.toLowerCase()) > -1))
+ ) {
+ tree.compatMode = "limited quirks";
+ tree.parseError("almost-standards-doctype");
+ } else {
+ if ((publicId == "-//W3C//DTD HTML 4.0//EN" && (systemId == null || systemId == "http://www.w3.org/TR/REC-html40/strict.dtd"))
+ || (publicId == "-//W3C//DTD HTML 4.01//EN" && (systemId == null || systemId == "http://www.w3.org/TR/html4/strict.dtd"))
+ || (publicId == "-//W3C//DTD XHTML 1.0 Strict//EN" && (systemId == "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"))
+ || (publicId == "-//W3C//DTD XHTML 1.1//EN" && (systemId == "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"))
+ ) {
+ // warning
+ //tree.warn("obsolete-doctype");
+ } else if (!((systemId == null || systemId == "about:legacy-compat") && publicId == null)) {
+ tree.parseError("unknown-doctype");
+ }
+ }
+ tree.setInsertionMode('beforeHTML');
+ function publicIdStartsWith(string) {
+ return publicId.toLowerCase().indexOf(string) === 0;
+ }
+ };
+
+ modes.initial.processCharacters = function(buffer) {
+ buffer.skipLeadingWhitespace();
+ if (!buffer.length)
+ return;
+ tree.parseError('expected-doctype-but-got-chars');
+ this.anythingElse();
+ tree.insertionMode.processCharacters(buffer);
+ };
+
+ modes.initial.processStartTag = function(name, attributes, selfClosing) {
+ tree.parseError('expected-doctype-but-got-start-tag', {name: name});
+ this.anythingElse();
+ tree.insertionMode.processStartTag(name, attributes, selfClosing);
+ };
+
+ modes.initial.processEndTag = function(name) {
+ tree.parseError('expected-doctype-but-got-end-tag', {name: name});
+ this.anythingElse();
+ tree.insertionMode.processEndTag(name);
+ };
+
+ modes.initial.anythingElse = function() {
+ tree.compatMode = 'quirks';
+ tree.setInsertionMode('beforeHTML');
+ };
+
+ modes.beforeHTML = Object.create(modes.base);
+
+ modes.beforeHTML.start_tag_handlers = {
+ html: 'startTagHtml',
+ '-default': 'startTagOther'
+ };
+
+ modes.beforeHTML.processEOF = function() {
+ this.anythingElse();
+ tree.insertionMode.processEOF();
+ };
+
+ modes.beforeHTML.processComment = function(data) {
+ tree.insertComment(data, tree.document);
+ };
+
+ modes.beforeHTML.processCharacters = function(buffer) {
+ buffer.skipLeadingWhitespace();
+ if (!buffer.length)
+ return;
+ this.anythingElse();
+ tree.insertionMode.processCharacters(buffer);
+ };
+
+ modes.beforeHTML.startTagHtml = function(name, attributes, selfClosing) {
+ tree.firstStartTag = true;
+ tree.insertHtmlElement(attributes);
+ tree.setInsertionMode('beforeHead');
+ };
+
+ modes.beforeHTML.startTagOther = function(name, attributes, selfClosing) {
+ this.anythingElse();
+ tree.insertionMode.processStartTag(name, attributes, selfClosing);
+ };
+
+ modes.beforeHTML.processEndTag = function(name) {
+ this.anythingElse();
+ tree.insertionMode.processEndTag(name);
+ };
+
+ modes.beforeHTML.anythingElse = function() {
+ tree.insertHtmlElement();
+ tree.setInsertionMode('beforeHead');
+ };
+
+ modes.afterAfterBody = Object.create(modes.base);
+
+ modes.afterAfterBody.start_tag_handlers = {
+ html: 'startTagHtml',
+ '-default': 'startTagOther'
+ };
+
+ modes.afterAfterBody.processComment = function(data) {
+ tree.insertComment(data, tree.document);
+ };
+
+ modes.afterAfterBody.processDoctype = function(data) {
+ modes.inBody.processDoctype(data);
+ };
+
+ modes.afterAfterBody.startTagHtml = function(data, attributes) {
+ modes.inBody.startTagHtml(data, attributes);
+ };
+
+ modes.afterAfterBody.startTagOther = function(name, attributes, selfClosing) {
+ tree.parseError('unexpected-start-tag', {name: name});
+ tree.setInsertionMode('inBody');
+ tree.insertionMode.processStartTag(name, attributes, selfClosing);
+ };
+
+ modes.afterAfterBody.endTagOther = function(name) {
+ tree.parseError('unexpected-end-tag', {name: name});
+ tree.setInsertionMode('inBody');
+ tree.insertionMode.processEndTag(name);
+ };
+
+ modes.afterAfterBody.processCharacters = function(data) {
+ if (!isAllWhitespace(data.characters)) {
+ tree.parseError('unexpected-char-after-body');
+ tree.setInsertionMode('inBody');
+ return tree.insertionMode.processCharacters(data);
+ }
+ modes.inBody.processCharacters(data);
+ };
+
+ modes.afterBody = Object.create(modes.base);
+
+ modes.afterBody.end_tag_handlers = {
+ html: 'endTagHtml',
+ '-default': 'endTagOther'
+ };
+
+ modes.afterBody.processComment = function(data) {
+ // This is needed because data is to be appended to the html element here
+ // and not to whatever is currently open.
+ tree.insertComment(data, tree.openElements.rootNode);
+ };
+
+ modes.afterBody.processCharacters = function(data) {
+ if (!isAllWhitespace(data.characters)) {
+ tree.parseError('unexpected-char-after-body');
+ tree.setInsertionMode('inBody');
+ return tree.insertionMode.processCharacters(data);
+ }
+ modes.inBody.processCharacters(data);
+ };
+
+ modes.afterBody.processStartTag = function(name, attributes, selfClosing) {
+ tree.parseError('unexpected-start-tag-after-body', {name: name});
+ tree.setInsertionMode('inBody');
+ tree.insertionMode.processStartTag(name, attributes, selfClosing);
+ };
+
+ modes.afterBody.endTagHtml = function(name) {
+ if (tree.context) {
+ tree.parseError('end-html-in-innerhtml');
+ } else {
+ // XXX This may need to be done, not sure
+ // Don't set last_phase to the current phase but to the inBody phase
+ // instead. No need for extra parseErrors if there's something after
+ //