diff --git a/lib/ace/mode/html.js b/lib/ace/mode/html.js index cd6807c1..6014f02b 100644 --- a/lib/ace/mode/html.js +++ b/lib/ace/mode/html.js @@ -40,8 +40,10 @@ var HtmlHighlightRules = require("./html_highlight_rules").HtmlHighlightRules; var HtmlBehaviour = require("./behaviour/html").HtmlBehaviour; var HtmlFoldMode = require("./folding/html").FoldMode; var HtmlCompletions = require("./html_completions").HtmlCompletions; +var WorkerClient = require("../worker/worker_client").WorkerClient; -var Mode = function() { +var Mode = function(options) { + this.fragmentContext = options && options.fragmentContext; this.HighlightRules = HtmlHighlightRules; this.$behaviour = new HtmlBehaviour(); this.$completer = new HtmlCompletions(); @@ -71,6 +73,24 @@ oop.inherits(Mode, TextMode); return this.$completer.getCompletions(state, session, pos, prefix); }; + this.createWorker = function(session) { + var worker = new WorkerClient(["ace"], "ace/mode/html_worker", "Worker"); + worker.attachToDocument(session.getDocument()); + + if (this.fragmentContext) + worker.call("setOptions", [{context: this.fragmentContext}]); + + worker.on("error", function(e) { + session.setAnnotations(e.data); + }); + + worker.on("terminate", function() { + session.clearAnnotations(); + }); + + return worker; + }; + this.$id = "ace/mode/html"; }).call(Mode.prototype); diff --git a/lib/ace/mode/html/saxparser.js b/lib/ace/mode/html/saxparser.js new file mode 100644 index 00000000..a15663f8 --- /dev/null +++ b/lib/ace/mode/html/saxparser.js @@ -0,0 +1,11113 @@ +define(function(require, exports, module){ +require=(function(e,t,n){function i(n,s){if(!t[n]){if(!e[n]){var o=typeof require=="function"&&require;if(!s&&o)return o(n,!0);if(r)return r(n,!0);throw new Error("Cannot find module '"+n+"'")}var u=t[n]={exports:{}};e[n][0].call(u.exports,function(t){var r=e[n][1][t];return i(r?r:t)},u,u.exports)}return t[n].exports}var r=typeof require=="function"&&require;for(var s=0;s= 0; i--) { + var node = this.elements[i]; + if (node.localName === localName) + return true; + if (isMarker(node)) + return false; + } +}; + +/** + * Pushes the item on the stack top + * @param {StackItem} item + */ +ElementStack.prototype.push = function(item) { + this.elements.push(item); +}; + +/** + * Pushes the item on the stack top + * @param {StackItem} item HTML element stack item + */ +ElementStack.prototype.pushHtmlElement = function(item) { + this.rootNode = item.node; + this.push(item); +}; + +/** + * Pushes the item on the stack top + * @param {StackItem} item HEAD element stack item + */ +ElementStack.prototype.pushHeadElement = function(item) { + this.headElement = item.node; + this.push(item); +}; + +/** + * Pushes the item on the stack top + * @param {StackItem} item BODY element stack item + */ +ElementStack.prototype.pushBodyElement = function(item) { + this.bodyElement = item.node; + this.push(item); +}; + +/** + * Pops the topmost item + * @return {StackItem} + */ +ElementStack.prototype.pop = function() { + return this.elements.pop(); +}; + +/** + * Removes the item from the element stack + * @param {StackItem} item The item to remove + */ +ElementStack.prototype.remove = function(item) { + this.elements.splice(this.elements.indexOf(item), 1); +}; + +ElementStack.prototype.popUntilPopped = function(localName) { + var element; + do { + element = this.pop(); + } while (element.localName != localName); +}; + +ElementStack.prototype.popUntilTableScopeMarker = function() { + while (!isTableScopeMarker(this.top)) + this.pop(); +}; + +ElementStack.prototype.popUntilTableBodyScopeMarker = function() { + while (!isTableBodyScopeMarker(this.top)) + this.pop(); +}; + +ElementStack.prototype.popUntilTableRowScopeMarker = function() { + while (!isTableRowScopeMarker(this.top)) + this.pop(); +}; + +ElementStack.prototype.item = function(index) { + return this.elements[index]; +}; + +ElementStack.prototype.contains = function(element) { + return this.elements.indexOf(element) !== -1; +}; + +ElementStack.prototype.inScope = function(localName) { + return this._inScope(localName, isScopeMarker); +}; + +ElementStack.prototype.inListItemScope = function(localName) { + return this._inScope(localName, isListItemScopeMarker); +}; + +ElementStack.prototype.inTableScope = function(localName) { + return this._inScope(localName, isTableScopeMarker); +}; + +ElementStack.prototype.inButtonScope = function(localName) { + return this._inScope(localName, isButtonScopeMarker); +}; + +ElementStack.prototype.inSelectScope = function(localName) { + return this._inScope(localName, isSelectScopeMarker); +}; + +ElementStack.prototype.hasNumberedHeaderElementInScope = function() { + for (var i = this.elements.length - 1; i >= 0; i--) { + var node = this.elements[i]; + if (node.isNumberedHeader()) + return true; + if (isScopeMarker(node)) + return false; + } +}; + +ElementStack.prototype.furthestBlockForFormattingElement = function(element) { + var furthestBlock = null; + for (var i = this.elements.length - 1; i >= 0; i--) { + var node = this.elements[i]; + if (node.node === element) + return furthestBlock; + if (node.isSpecial()) + furthestBlock = node; + } +}; + +ElementStack.prototype.findIndex = function(localName) { + for (var i = this.elements.length - 1; i >= 0; i--) { + if (this.elements[i].localName == localName) + return i; + } +}; + +ElementStack.prototype.remove_openElements_until = function(callback) { + var finished = false; + var element; + while (!finished) { + element = this.elements.pop(); + finished = callback(element); + } + return element; +}; + +Object.defineProperty(ElementStack.prototype, 'top', { + get: function() { + return this.elements[this.elements.length - 1]; + } +}); + +Object.defineProperty(ElementStack.prototype, 'length', { + get: function() { + return this.elements.length; + } +}); + +exports.ElementStack = ElementStack; + +},{}],2:[function(require,module,exports){ +var entities = require('html5-entities'); +var InputStream = require('./InputStream').InputStream; + +/** + * Magic value for UTF-16 operations. + */ +var LEAD_OFFSET = (0xD800 - (0x10000 >> 10)); + +var namedEntityPrefixes = {}; +Object.keys(entities).forEach(function (entityKey) { + for (var i = 0; i < entityKey.length; i++) { + namedEntityPrefixes[entityKey.substring(0, i + 1)] = true; + } +}); + +function isAlphaNumeric(c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +function isHexDigit(c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} + +function isDecimalDigit(c) { + return (c >= '0' && c <= '9'); +} + +var EntityParser = {}; + +EntityParser.consumeEntity = function(buffer, tokenizer, additionalAllowedCharacter) { + var decodedCharacter = ''; + var consumedCharacters = ''; + var ch = buffer.char(); + if (ch === InputStream.EOF) + return false; + consumedCharacters += ch; + if (ch == '\t' || ch == '\n' || ch == '\v' || ch == ' ' || ch == '<' || ch == '&') { + buffer.unget(consumedCharacters); + return false; + } + if (additionalAllowedCharacter === ch) { + buffer.unget(consumedCharacters); + return false; + } + if (ch == '#') { + ch = buffer.shift(1); + if (ch === InputStream.EOF) { + tokenizer._parseError("expected-numeric-entity-but-got-eof"); + buffer.unget(consumedCharacters); + return false; + } + consumedCharacters += ch; + var radix = 10; + var isDigit = isDecimalDigit; + if (ch == 'x' || ch == 'X') { + radix = 16; + isDigit = isHexDigit; + ch = buffer.shift(1); + if (ch === InputStream.EOF) { + tokenizer._parseError("expected-numeric-entity-but-got-eof"); + buffer.unget(consumedCharacters); + return false; + } + consumedCharacters += ch; + } + if (isDigit(ch)) { + var code = ''; + while (ch !== InputStream.EOF && isDigit(ch)) { + code += ch; + ch = buffer.char(); + } + code = parseInt(code, radix); + var replacement = this.replaceEntityNumbers(code); + if (replacement) { + tokenizer._parseError("invalid-numeric-entity-replaced"); + code = replacement; + } + if (code > 0xFFFF && code < 0x10FFFF) { + var astralChar = ""; + astralChar += String.fromCharCode(LEAD_OFFSET + (code >> 10)); + astralChar += String.fromCharCode(0xDC00 + (code & 0x3FF)); + decodedCharacter = astralChar; + } else + decodedCharacter = String.fromCharCode(code); + if (ch !== ';') { + tokenizer._parseError("numeric-entity-without-semicolon"); + buffer.unget(ch); + } + return decodedCharacter; + } + buffer.unget(consumedCharacters); + tokenizer._parseError("expected-numeric-entity"); + return false; + } + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { + var mostRecentMatch = ''; + while (namedEntityPrefixes[consumedCharacters]) { + if (entities[consumedCharacters]) { + mostRecentMatch = consumedCharacters; + } + if (ch == ';') + break; + ch = buffer.char(); + if (ch === InputStream.EOF) + break; + consumedCharacters += ch; + } + if (!mostRecentMatch) { + tokenizer._parseError("expected-named-entity"); + buffer.unget(consumedCharacters); + return false; + } + decodedCharacter = entities[mostRecentMatch]; + if (ch === ';' || !additionalAllowedCharacter || !(isAlphaNumeric(ch) || ch === '=')) { + if (consumedCharacters.length > mostRecentMatch.length) { + buffer.unget(consumedCharacters.substring(mostRecentMatch.length)); + } + if (ch !== ';') { + tokenizer._parseError("named-entity-without-semicolon"); + } + return decodedCharacter; + } + buffer.unget(consumedCharacters); + return false; + } +}; + +EntityParser.replaceEntityNumbers = function(c) { + switch(c) { + case 0x00: return 0xFFFD; // REPLACEMENT CHARACTER + case 0x13: return 0x0010; // Carriage return + case 0x80: return 0x20AC; // EURO SIGN + case 0x81: return 0x0081; // + case 0x82: return 0x201A; // SINGLE LOW-9 QUOTATION MARK + case 0x83: return 0x0192; // LATIN SMALL LETTER F WITH HOOK + case 0x84: return 0x201E; // DOUBLE LOW-9 QUOTATION MARK + case 0x85: return 0x2026; // HORIZONTAL ELLIPSIS + case 0x86: return 0x2020; // DAGGER + case 0x87: return 0x2021; // DOUBLE DAGGER + case 0x88: return 0x02C6; // MODIFIER LETTER CIRCUMFLEX ACCENT + case 0x89: return 0x2030; // PER MILLE SIGN + case 0x8A: return 0x0160; // LATIN CAPITAL LETTER S WITH CARON + case 0x8B: return 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + case 0x8C: return 0x0152; // LATIN CAPITAL LIGATURE OE + case 0x8D: return 0x008D; // + case 0x8E: return 0x017D; // LATIN CAPITAL LETTER Z WITH CARON + case 0x8F: return 0x008F; // + case 0x90: return 0x0090; // + case 0x91: return 0x2018; // LEFT SINGLE QUOTATION MARK + case 0x92: return 0x2019; // RIGHT SINGLE QUOTATION MARK + case 0x93: return 0x201C; // LEFT DOUBLE QUOTATION MARK + case 0x94: return 0x201D; // RIGHT DOUBLE QUOTATION MARK + case 0x95: return 0x2022; // BULLET + case 0x96: return 0x2013; // EN DASH + case 0x97: return 0x2014; // EM DASH + case 0x98: return 0x02DC; // SMALL TILDE + case 0x99: return 0x2122; // TRADE MARK SIGN + case 0x9A: return 0x0161; // LATIN SMALL LETTER S WITH CARON + case 0x9B: return 0x203A; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + case 0x9C: return 0x0153; // LATIN SMALL LIGATURE OE + case 0x9D: return 0x009D; // + case 0x9E: return 0x017E; // LATIN SMALL LETTER Z WITH CARON + case 0x9F: return 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS + default: + if ((c >= 0xD800 && c <= 0xDFFF) || c >= 0x10FFFF) { /// @todo. The spec says > 0x10FFFF, not >=. Section 8.2.4.69. + return 0xFFFD; + } else if ((c >= 0x0001 && c <= 0x0008) || (c >= 0x000E && c <= 0x001F) || + (c >= 0x007F && c <= 0x009F) || (c >= 0xFDD0 && c <= 0xFDEF) || + c == 0x000B || c == 0xFFFE || c == 0x1FFFE || c == 0x2FFFFE || + c == 0x2FFFF || c == 0x3FFFE || c == 0x3FFFF || c == 0x4FFFE || + c == 0x4FFFF || c == 0x5FFFE || c == 0x5FFFF || c == 0x6FFFE || + c == 0x6FFFF || c == 0x7FFFE || c == 0x7FFFF || c == 0x8FFFE || + c == 0x8FFFF || c == 0x9FFFE || c == 0x9FFFF || c == 0xAFFFE || + c == 0xAFFFF || c == 0xBFFFE || c == 0xBFFFF || c == 0xCFFFE || + c == 0xCFFFF || c == 0xDFFFE || c == 0xDFFFF || c == 0xEFFFE || + c == 0xEFFFF || c == 0xFFFFE || c == 0xFFFFF || c == 0x10FFFE || + c == 0x10FFFF) { + return c; + } + } +}; + +exports.EntityParser = EntityParser; + +},{"./InputStream":3,"html5-entities":12}],3:[function(require,module,exports){ +// FIXME convert CR to LF http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#input-stream +function InputStream() { + this.data = ''; + this.start = 0; + this.committed = 0; + this.eof = false; + this.lastLocation = {line: 0, column: 0}; +} + +InputStream.EOF = -1; + +InputStream.DRAIN = -2; + +InputStream.prototype = { + slice: function() { + if(this.start >= this.data.length) { + if(!this.eof) throw InputStream.DRAIN; + return InputStream.EOF; + } + return this.data.slice(this.start, this.data.length); + }, + char: function() { + if(!this.eof && this.start >= this.data.length - 1) throw InputStream.DRAIN; + if(this.start >= this.data.length) { + return InputStream.EOF; + } + return this.data[this.start++]; + }, + advance: function(amount) { + this.start += amount; + if(this.start >= this.data.length) { + if(!this.eof) throw InputStream.DRAIN; + return InputStream.EOF; + } else { + if(this.committed > this.data.length / 2) { + // Sliiiide + this.lastLocation = this.location(); + this.data = this.data.slice(this.committed); + this.start = this.start - this.committed; + this.committed = 0; + } + } + }, + matchWhile: function(re) { + if(this.eof && this.start >= this.data.length ) return ''; + var r = new RegExp("^"+re+"+"); + var m = r.exec(this.slice()); + if(m) { + if(!this.eof && m[0].length == this.data.length - this.start) throw InputStream.DRAIN; + this.advance(m[0].length); + return m[0]; + } else { + return ''; + } + }, + matchUntil: function(re) { + var m, s; + s = this.slice(); + if(s === InputStream.EOF) { + return ''; + } else if(m = new RegExp(re + (this.eof ? "|$" : "")).exec(s)) { + var t = this.data.slice(this.start, this.start + m.index); + this.advance(m.index); + return t.toString(); + } else { + throw InputStream.DRAIN; + } + }, + append: function(data) { + this.data += data; + }, + shift: function(n) { + if(!this.eof && this.start + n >= this.data.length) throw InputStream.DRAIN; + if(this.eof && this.start >= this.data.length) return InputStream.EOF; + var d = this.data.slice(this.start, this.start + n).toString(); + this.advance(Math.min(n, this.data.length - this.start)); + return d; + }, + peek: function(n) { + if(!this.eof && this.start + n >= this.data.length) throw InputStream.DRAIN; + if(this.eof && this.start >= this.data.length) return InputStream.EOF; + return this.data.slice(this.start, Math.min(this.start + n, this.data.length)).toString(); + }, + length: function() { + return this.data.length - this.start - 1; + }, + unget: function(d) { + if(d === InputStream.EOF) return; + this.start -= (d.length); + }, + undo: function() { + this.start = this.committed; + }, + commit: function() { + this.committed = this.start; + }, + location: function() { + var lastLine = this.lastLocation.line; + var lastColumn = this.lastLocation.column; + var read = this.data.slice(0, this.committed); + var newlines = read.match(/\n/g); + var line = newlines ? lastLine + newlines.length : lastLine; + var column = newlines ? read.length - read.lastIndexOf('\n') - 1 : lastColumn + read.length; + return {line: line, column: column}; + } +}; + +exports.InputStream = InputStream; + +},{}],4:[function(require,module,exports){ +var SpecialElements = { + "http://www.w3.org/1999/xhtml": [ + 'address', + 'applet', + 'area', + 'article', + 'aside', + 'base', + 'basefont', + 'bgsound', + 'blockquote', + 'body', + 'br', + 'button', + 'caption', + 'center', + 'col', + 'colgroup', + 'dd', + 'details', + 'dir', + 'div', + 'dl', + 'dt', + 'embed', + 'fieldset', + 'figcaption', + 'figure', + 'footer', + 'form', + 'frame', + 'frameset', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'head', + 'header', + 'hgroup', + 'hr', + 'html', + 'iframe', + 'img', + 'input', + 'isindex', + 'li', + 'link', + 'listing', + 'main', + 'marquee', + 'menu', + 'menuitem', + 'meta', + 'nav', + 'noembed', + 'noframes', + 'noscript', + 'object', + 'ol', + 'p', + 'param', + 'plaintext', + 'pre', + 'script', + 'section', + 'select', + 'source', + 'style', + 'summary', + 'table', + 'tbody', + 'td', + 'textarea', + 'tfoot', + 'th', + 'thead', + 'title', + 'tr', + 'track', + 'ul', + 'wbr', + 'xmp' + ], + "http://www.w3.org/1998/Math/MathML": [ + 'mi', + 'mo', + 'mn', + 'ms', + 'mtext', + 'annotation-xml' + ], + "http://www.w3.org/2000/svg": [ + 'foreignObject', + 'desc', + 'title' + ] +}; + + +function StackItem(namespaceURI, localName, attributes, node) { + this.localName = localName; + this.namespaceURI = namespaceURI; + this.attributes = attributes; + this.node = node; +} + +// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special +StackItem.prototype.isSpecial = function() { + return this.namespaceURI in SpecialElements && + SpecialElements[this.namespaceURI].indexOf(this.localName) > -1; +}; + +StackItem.prototype.isFosterParenting = function() { + if (this.namespaceURI === "http://www.w3.org/1999/xhtml") { + return this.localName === 'table' || + this.localName === 'tbody' || + this.localName === 'tfoot' || + this.localName === 'thead' || + this.localName === 'tr'; + } + return false; +}; + +StackItem.prototype.isNumberedHeader = function() { + if (this.namespaceURI === "http://www.w3.org/1999/xhtml") { + return this.localName === 'h1' || + this.localName === 'h2' || + this.localName === 'h3' || + this.localName === 'h4' || + this.localName === 'h5' || + this.localName === 'h6'; + } + return false; +}; + +StackItem.prototype.isForeign = function() { + return this.namespaceURI != "http://www.w3.org/1999/xhtml"; +}; + +function getAttribute(item, name) { + for (var i = 0; i < item.attributes.length; i++) { + if (item.attributes[i].nodeName == name) + return item.attributes[i].nodeValue; + } + return null; +} + +StackItem.prototype.isHtmlIntegrationPoint = function() { + if (this.namespaceURI === "http://www.w3.org/1998/Math/MathML") { + if (this.localName !== "annotation-xml") + return false; + var encoding = getAttribute(this, 'encoding'); + if (!encoding) + return false; + encoding = encoding.toLowerCase(); + return encoding === "text/html" || encoding === "application/xhtml+xml"; + } + if (this.namespaceURI === "http://www.w3.org/2000/svg") { + return this.localName === "foreignObject" + || this.localName === "desc" + || this.localName === "title"; + } + return false; +}; + +StackItem.prototype.isMathMLTextIntegrationPoint = function() { + if (this.namespaceURI === "http://www.w3.org/1998/Math/MathML") { + return this.localName === "mi" + || this.localName === "mo" + || this.localName === "mn" + || this.localName === "ms" + || this.localName === "mtext"; + } + return false; +}; + +exports.StackItem = StackItem; + +},{}],5:[function(require,module,exports){ +var InputStream = require('./InputStream').InputStream; +var EntityParser = require('./EntityParser').EntityParser; + +function isWhitespace(c){ + return c === " " || c === "\n" || c === "\t" || c === "\r" || c === "\f"; +} + +function isAlpha(c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +/** + * + * @param {Object} tokenHandler + * @constructor + */ +function Tokenizer(tokenHandler) { + this._tokenHandler = tokenHandler; + this._state = Tokenizer.DATA; + this._inputStream = new InputStream(); + this._currentToken = null; + this._temporaryBuffer = ''; + this._additionalAllowedCharacter = ''; +} + +Tokenizer.prototype._parseError = function(code, args) { + this._tokenHandler.parseError(code, args); +}; + +Tokenizer.prototype._emitToken = function(token) { + if (token.type === 'StartTag') { + for (var i = 1; i < token.data.length; i++) { + if (!token.data[i].nodeName) + token.data.splice(i--, 1); + } + } else if (token.type === 'EndTag') { + if (token.selfClosing) { + this._parseError('self-closing-flag-on-end-tag'); + } + if (token.data.length !== 0) { + this._parseError('attributes-in-end-tag'); + } + } + this._tokenHandler.processToken(token); + if (token.type === 'StartTag' && token.selfClosing && !this._tokenHandler.isSelfClosingFlagAcknowledged()) { + this._parseError('non-void-element-with-trailing-solidus', {name: token.name}); + } +}; + +Tokenizer.prototype._emitCurrentToken = function() { + this._state = Tokenizer.DATA; + this._emitToken(this._currentToken); +}; + +Tokenizer.prototype._currentAttribute = function() { + return this._currentToken.data[this._currentToken.data.length - 1]; +}; + +Tokenizer.prototype.setState = function(state) { + this._state = state; +}; + +Tokenizer.prototype.tokenize = function(source) { + // FIXME proper tokenizer states + Tokenizer.DATA = data_state; + Tokenizer.RCDATA = rcdata_state; + Tokenizer.RAWTEXT = rawtext_state; + Tokenizer.SCRIPT_DATA = script_data_state; + Tokenizer.PLAINTEXT = plaintext_state; + + + this._state = Tokenizer.DATA; + + this._inputStream.append(source); + + this._tokenHandler.startTokenization(this); + + this._inputStream.eof = true; + + var tokenizer = this; + + while (this._state.call(this, this._inputStream)); + + + function data_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._emitToken({type: 'EOF', data: null}); + return false; + } else if (data === '&') { + tokenizer.setState(character_reference_in_data_state); + } else if (data === '<') { + tokenizer.setState(tag_open_state); + } else if (data === '\u0000') { + tokenizer._emitToken({type: 'Characters', data: data}); + buffer.commit(); + } else { + var chars = buffer.matchUntil("&|<|\u0000"); + tokenizer._emitToken({type: 'Characters', data: data + chars}); + buffer.commit(); + } + return true; + } + + function character_reference_in_data_state(buffer) { + var character = EntityParser.consumeEntity(buffer, tokenizer); + tokenizer.setState(data_state); + tokenizer._emitToken({type: 'Characters', data: character || '&'}); + return true; + } + + function rcdata_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._emitToken({type: 'EOF', data: null}); + return false; + } else if (data === '&') { + tokenizer.setState(character_reference_in_rcdata_state); + } else if (data === '<') { + tokenizer.setState(rcdata_less_than_sign_state); + } else if (data === "\u0000") { + tokenizer._parseError("invalid-codepoint"); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + buffer.commit(); + } else { + var chars = buffer.matchUntil("&|<|\u0000"); + tokenizer._emitToken({type: 'Characters', data: data + chars}); + buffer.commit(); + } + return true; + } + + function character_reference_in_rcdata_state(buffer) { + var character = EntityParser.consumeEntity(buffer, tokenizer); + tokenizer.setState(rcdata_state); + tokenizer._emitToken({type: 'Characters', data: character || '&'}); + return true; + } + + function rawtext_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._emitToken({type: 'EOF', data: null}); + return false; + } else if (data === '<') { + tokenizer.setState(rawtext_less_than_sign_state); + } else if (data === "\u0000") { + tokenizer._parseError("invalid-codepoint"); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + buffer.commit(); + } else { + var chars = buffer.matchUntil("<|\u0000"); + tokenizer._emitToken({type: 'Characters', data: data + chars}); + } + return true; + } + + function plaintext_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._emitToken({type: 'EOF', data: null}); + return false; + } else if (data === "\u0000") { + tokenizer._parseError("invalid-codepoint"); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + buffer.commit(); + } else { + var chars = buffer.matchUntil("\u0000"); + tokenizer._emitToken({type: 'Characters', data: data + chars}); + } + return true; + } + + + function script_data_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._emitToken({type: 'EOF', data: null}); + return false; + } else if (data === '<') { + tokenizer.setState(script_data_less_than_sign_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + buffer.commit(); + } else { + var chars = buffer.matchUntil("<|\u0000"); + tokenizer._emitToken({type: 'Characters', data: data + chars}); + } + return true; + } + + function rcdata_less_than_sign_state(buffer) { + var data = buffer.char(); + if (data === "/") { + this._temporaryBuffer = ''; + tokenizer.setState(rcdata_end_tag_open_state); + } else { + tokenizer._emitToken({type: 'Characters', data: '<'}); + buffer.unget(data); + tokenizer.setState(rcdata_state); + } + return true; + } + + function rcdata_end_tag_open_state(buffer) { + var data = buffer.char(); + if (isAlpha(data)) { + this._temporaryBuffer += data; + tokenizer.setState(rcdata_end_tag_name_state); + } else { + tokenizer._emitToken({type: 'Characters', data: '' && appropriate) { + tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else if (isAlpha(data)) { + this._temporaryBuffer += data; + buffer.commit(); + } else { + tokenizer._emitToken({type: 'Characters', data: '' && appropriate) { + tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else if (isAlpha(data)) { + this._temporaryBuffer += data; + buffer.commit(); + } else { + tokenizer._emitToken({type: 'Characters', data: '' && appropriate) { + tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; + tokenizer._emitCurrentToken(); + } else if (isAlpha(data)) { + this._temporaryBuffer += data; + buffer.commit(); + } else { + tokenizer._emitToken({type: 'Characters', data: '') { + tokenizer._emitToken({type: 'Characters', data: '>'}); + tokenizer.setState(script_data_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + tokenizer.setState(script_data_escaped_state); + } else { + tokenizer._emitToken({type: 'Characters', data: data}); + tokenizer.setState(script_data_escaped_state); + } + return true; + } + + function script_data_escaped_less_then_sign_state(buffer) { + var data = buffer.char(); + if (data === '/') { + this._temporaryBuffer = ''; + tokenizer.setState(script_data_escaped_end_tag_open_state); + } else if (isAlpha(data)) { + tokenizer._emitToken({type: 'Characters', data: '<' + data}); + this._temporaryBuffer = data; + tokenizer.setState(script_data_double_escape_start_state); + } else { + tokenizer._emitToken({type: 'Characters', data: '<'}); + buffer.unget(data); + tokenizer.setState(script_data_escaped_state); + } + return true; + } + + function script_data_escaped_end_tag_open_state(buffer) { + var data = buffer.char(); + if (isAlpha(data)) { + this._temporaryBuffer = data; + tokenizer.setState(script_data_escaped_end_tag_name_state); + } else { + tokenizer._emitToken({type: 'Characters', data: '' && appropriate) { + tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isAlpha(data)) { + this._temporaryBuffer += data; + buffer.commit(); + } else { + tokenizer._emitToken({type: 'Characters', data: '') { + tokenizer._emitToken({type: 'Characters', data: data}); + if (this._temporaryBuffer.toLowerCase() === 'script') + tokenizer.setState(script_data_double_escaped_state); + else + tokenizer.setState(script_data_escaped_state); + } else if (isAlpha(data)) { + tokenizer._emitToken({type: 'Characters', data: data}); + this._temporaryBuffer += data; + buffer.commit(); + } else { + buffer.unget(data); + tokenizer.setState(script_data_escaped_state); + } + return true; + } + + function script_data_double_escaped_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError('eof-in-script'); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer._emitToken({type: 'Characters', data: '-'}); + tokenizer.setState(script_data_double_escaped_dash_state); + } else if (data === '<') { + tokenizer._emitToken({type: 'Characters', data: '<'}); + tokenizer.setState(script_data_double_escaped_less_than_sign_state); + } else if (data === '\u0000') { + tokenizer._parseError('invalid-codepoint'); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + buffer.commit(); + } else { + tokenizer._emitToken({type: 'Characters', data: data}); + buffer.commit(); + } + return true; + } + + function script_data_double_escaped_dash_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError('eof-in-script'); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer._emitToken({type: 'Characters', data: '-'}); + tokenizer.setState(script_data_double_escaped_dash_dash_state); + } else if (data === '<') { + tokenizer._emitToken({type: 'Characters', data: '<'}); + tokenizer.setState(script_data_double_escaped_less_than_sign_state); + } else if (data === '\u0000') { + tokenizer._parseError('invalid-codepoint'); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + tokenizer.setState(script_data_double_escaped_state); + } else { + tokenizer._emitToken({type: 'Characters', data: data}); + tokenizer.setState(script_data_double_escaped_state); + } + return true; + } + + function script_data_double_escaped_dash_dash_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError('eof-in-script'); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer._emitToken({type: 'Characters', data: '-'}); + buffer.commit(); + } else if (data === '<') { + tokenizer._emitToken({type: 'Characters', data: '<'}); + tokenizer.setState(script_data_double_escaped_less_than_sign_state); + } else if (data === '>') { + tokenizer._emitToken({type: 'Characters', data: '>'}); + tokenizer.setState(script_data_state); + } else if (data === '\u0000') { + tokenizer._parseError('invalid-codepoint'); + tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); + tokenizer.setState(script_data_double_escaped_state); + } else { + tokenizer._emitToken({type: 'Characters', data: data}); + tokenizer.setState(script_data_double_escaped_state); + } + return true; + } + + function script_data_double_escaped_less_than_sign_state(buffer) { + var data = buffer.char(); + if (data === '/') { + tokenizer._emitToken({type: 'Characters', data: '/'}); + this._temporaryBuffer = ''; + tokenizer.setState(script_data_double_escape_end_state); + } else { + buffer.unget(data); + tokenizer.setState(script_data_double_escaped_state); + } + return true; + } + + function script_data_double_escape_end_state(buffer) { + var data = buffer.char(); + if (isWhitespace(data) || data === '/' || data === '>') { + tokenizer._emitToken({type: 'Characters', data: data}); + if (this._temporaryBuffer.toLowerCase() === 'script') + tokenizer.setState(script_data_escaped_state); + else + tokenizer.setState(script_data_double_escaped_state); + } else if (isAlpha(data)) { + tokenizer._emitToken({type: 'Characters', data: data}); + this._temporaryBuffer += data; + buffer.commit(); + } else { + buffer.unget(data); + tokenizer.setState(script_data_double_escaped_state); + } + return true; + } + + function tag_open_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("bare-less-than-sign-at-eof"); + tokenizer._emitToken({type: 'Characters', data: '<'}); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isAlpha(data)) { + tokenizer._currentToken = {type: 'StartTag', name: data.toLowerCase(), data: []}; + tokenizer.setState(tag_name_state); + } else if (data === '!') { + tokenizer.setState(markup_declaration_open_state); + } else if (data === '/') { + tokenizer.setState(close_tag_open_state); + } else if (data === '>') { + // XXX In theory it could be something besides a tag name. But + // do we really care? + tokenizer._parseError("expected-tag-name-but-got-right-bracket"); + tokenizer._emitToken({type: 'Characters', data: "<>"}); + tokenizer.setState(data_state); + } else if (data === '?') { + // XXX In theory it could be something besides a tag name. But + // do we really care? + tokenizer._parseError("expected-tag-name-but-got-question-mark"); + buffer.unget(data); + tokenizer.setState(bogus_comment_state); + } else { + // XXX + tokenizer._parseError("expected-tag-name"); + tokenizer._emitToken({type: 'Characters', data: "<"}); + buffer.unget(data); + tokenizer.setState(data_state); + } + return true; + } + + function close_tag_open_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("expected-closing-tag-but-got-eof"); + tokenizer._emitToken({type: 'Characters', data: '') { + tokenizer._parseError("expected-closing-tag-but-got-right-bracket"); + tokenizer.setState(data_state); + } else { + tokenizer._parseError("expected-closing-tag-but-got-char", {data: data}); // param 1 is datavars: + buffer.unget(data); + tokenizer.setState(bogus_comment_state); + } + return true; + } + + function tag_name_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError('eof-in-tag-name'); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + tokenizer.setState(before_attribute_name_state); + } else if (isAlpha(data)) { + tokenizer._currentToken.name += data.toLowerCase(); + } else if (data === '>') { + tokenizer._emitCurrentToken(); + } else if (data === '/') { + tokenizer.setState(self_closing_tag_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.name += "\uFFFD"; + } else { + tokenizer._currentToken.name += data; + } + buffer.commit(); + + return true; + } + + function before_attribute_name_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("expected-attribute-name-but-got-eof"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + return true; + } else if (isAlpha(data)) { + tokenizer._currentToken.data.push({nodeName: data.toLowerCase(), nodeValue: ""}); + tokenizer.setState(attribute_name_state); + } else if (data === '>') { + tokenizer._emitCurrentToken(); + } else if (data === '/') { + tokenizer.setState(self_closing_tag_state); + } else if (data === "'" || data === '"' || data === '=' || data === '<') { + tokenizer._parseError("invalid-character-in-attribute-name"); + tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); + tokenizer.setState(attribute_name_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data.push({nodeName: "\uFFFD", nodeValue: ""}); + } else { + tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); + tokenizer.setState(attribute_name_state); + } + return true; + } + + function attribute_name_state(buffer) { + var data = buffer.char(); + var leavingThisState = true; + var shouldEmit = false; + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-attribute-name"); + buffer.unget(data); + tokenizer.setState(data_state); + shouldEmit = true; + } else if (data === '=') { + tokenizer.setState(before_attribute_value_state); + } else if (isAlpha(data)) { + tokenizer._currentAttribute().nodeName += data.toLowerCase(); + leavingThisState = false; + } else if (data === '>') { + // XXX If we emit here the attributes are converted to a dict + // without being checked and when the code below runs we error + // because data is a dict not a list + shouldEmit = true; + } else if (isWhitespace(data)) { + tokenizer.setState(after_attribute_name_state); + } else if (data === '/') { + tokenizer.setState(self_closing_tag_state); + } else if (data === "'" || data === '"') { + tokenizer._parseError("invalid-character-in-attribute-name"); + tokenizer._currentAttribute().nodeName += data; + leavingThisState = false; + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentAttribute().nodeName += "\uFFFD"; + } else { + tokenizer._currentAttribute().nodeName += data; + leavingThisState = false; + } + + if (leavingThisState) { + // Attributes are not dropped at this stage. That happens when the + // start tag token is emitted so values can still be safely appended + // to attributes, but we do want to report the parse error in time. + var attributes = tokenizer._currentToken.data; + var currentAttribute = attributes[attributes.length - 1]; + for (var i = attributes.length - 2; i >= 0; i--) { + if (currentAttribute.nodeName === attributes[i].nodeName) { + tokenizer._parseError("duplicate-attribute", {name: currentAttribute.nodeName}); + currentAttribute.nodeName = null; + break; + } + } + if (shouldEmit) + tokenizer._emitCurrentToken(); + } else { + buffer.commit(); + } + return true; + } + + function after_attribute_name_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("expected-end-of-tag-but-got-eof"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + return true; + } else if (data === '=') { + tokenizer.setState(before_attribute_value_state); + } else if (data === '>') { + tokenizer._emitCurrentToken(); + } else if (isAlpha(data)) { + tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); + tokenizer.setState(attribute_name_state); + } else if (data === '/') { + tokenizer.setState(self_closing_tag_state); + } else if (data === "'" || data === '"' || data === '<') { + tokenizer._parseError("invalid-character-after-attribute-name"); + tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); + tokenizer.setState(attribute_name_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data.push({nodeName: "\uFFFD", nodeValue: ""}); + } else { + tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); + tokenizer.setState(attribute_name_state); + } + return true; + } + + function before_attribute_value_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("expected-attribute-value-but-got-eof"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + return true; + } else if (data === '"') { + tokenizer.setState(attribute_value_double_quoted_state); + } else if (data === '&') { + tokenizer.setState(attribute_value_unquoted_state); + buffer.unget(data); + } else if (data === "'") { + tokenizer.setState(attribute_value_single_quoted_state); + } else if (data === '>') { + tokenizer._parseError("expected-attribute-value-but-got-right-bracket"); + tokenizer._emitCurrentToken(); + } else if (data === '=' || data === '<' || data === '`') { + tokenizer._parseError("unexpected-character-in-unquoted-attribute-value"); + tokenizer._currentAttribute().nodeValue += data; + tokenizer.setState(attribute_value_unquoted_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentAttribute().nodeValue += "\uFFFD"; + } else { + tokenizer._currentAttribute().nodeValue += data; + tokenizer.setState(attribute_value_unquoted_state); + } + + return true; + } + + function attribute_value_double_quoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-attribute-value-double-quote"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '"') { + tokenizer.setState(after_attribute_value_state); + } else if (data === '&') { + this._additionalAllowedCharacter = '"'; + tokenizer.setState(character_reference_in_attribute_value_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentAttribute().nodeValue += "\uFFFD"; + } else { + var s = buffer.matchUntil('[\0"&]'); + data = data + s; + tokenizer._currentAttribute().nodeValue += data; + } + return true; + } + + function attribute_value_single_quoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-attribute-value-single-quote"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === "'") { + tokenizer.setState(after_attribute_value_state); + } else if (data === '&') { + this._additionalAllowedCharacter = "'"; + tokenizer.setState(character_reference_in_attribute_value_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentAttribute().nodeValue += "\uFFFD"; + } else { + tokenizer._currentAttribute().nodeValue += data + buffer.matchUntil("\u0000|['&]"); + } + return true; + } + + function attribute_value_unquoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-after-attribute-value"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + tokenizer.setState(before_attribute_name_state); + } else if (data === '&') { + this._additionalAllowedCharacter = ">"; + tokenizer.setState(character_reference_in_attribute_value_state); + } else if (data === '>') { + tokenizer._emitCurrentToken(); + } else if (data === '"' || data === "'" || data === '=' || data === '`' || data === '<') { + tokenizer._parseError("unexpected-character-in-unquoted-attribute-value"); + tokenizer._currentAttribute().nodeValue += data; + buffer.commit(); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentAttribute().nodeValue += "\uFFFD"; + } else { + var o = buffer.matchUntil("\u0000|["+ "\t\n\v\f\x20\r" + "&<>\"'=`" +"]"); + if (o === InputStream.EOF) { + tokenizer._parseError("eof-in-attribute-value-no-quotes"); + tokenizer._emitCurrentToken(); + } + // Commit here since this state is re-enterable and its outcome won't change with more data. + buffer.commit(); + tokenizer._currentAttribute().nodeValue += data + o; + } + return true; + } + + function character_reference_in_attribute_value_state(buffer) { + var character = EntityParser.consumeEntity(buffer, tokenizer, this._additionalAllowedCharacter); + this._currentAttribute().nodeValue += character || '&'; + // We're supposed to switch back to the attribute value state that + // we were in when we were switched into this state. Rather than + // keeping track of this explictly, we observe that the previous + // state can be determined by additionalAllowedCharacter. + if (this._additionalAllowedCharacter === '"') + tokenizer.setState(attribute_value_double_quoted_state); + else if (this._additionalAllowedCharacter === '\'') + tokenizer.setState(attribute_value_single_quoted_state); + else if (this._additionalAllowedCharacter === '>') + tokenizer.setState(attribute_value_unquoted_state); + return true; + } + + function after_attribute_value_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-after-attribute-value"); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + tokenizer.setState(before_attribute_name_state); + } else if (data === '>') { + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (data === '/') { + tokenizer.setState(self_closing_tag_state); + } else { + tokenizer._parseError("unexpected-character-after-attribute-value"); + buffer.unget(data); + tokenizer.setState(before_attribute_name_state); + } + return true; + } + + function self_closing_tag_state(buffer) { + var c = buffer.char(); + if (c === InputStream.EOF) { + tokenizer._parseError("unexpected-eof-after-solidus-in-tag"); + buffer.unget(c); + tokenizer.setState(data_state); + } else if (c === '>') { + tokenizer._currentToken.selfClosing = true; + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + tokenizer._parseError("unexpected-character-after-solidus-in-tag"); + buffer.unget(c); + tokenizer.setState(before_attribute_name_state); + } + return true; + } + + function bogus_comment_state(buffer) { + var data = buffer.matchUntil('>'); + data = data.replace(/\u0000/g, "\uFFFD"); + buffer.char(); + tokenizer._emitToken({type: 'Comment', data: data}); + tokenizer.setState(data_state); + return true; + } + + function markup_declaration_open_state(buffer) { + var chars = buffer.shift(2); + if (chars === '--') { + tokenizer._currentToken = {type: 'Comment', data: ''}; + tokenizer.setState(comment_start_state); + } else { + var newchars = buffer.shift(5); + if (newchars === InputStream.EOF || chars === InputStream.EOF) { + tokenizer._parseError("expected-dashes-or-doctype"); + tokenizer.setState(bogus_comment_state); + buffer.unget(chars); + return true; + } + + chars += newchars; + if (chars.toUpperCase() === 'DOCTYPE') { + tokenizer._currentToken = {type: 'Doctype', name: '', publicId: null, systemId: null, forceQuirks: false}; + tokenizer.setState(doctype_state); + } else if (tokenizer._tokenHandler.isCdataSectionAllowed() && chars === '[CDATA[') { + tokenizer.setState(cdata_section_state); + } else { + tokenizer._parseError("expected-dashes-or-doctype"); + buffer.unget(chars); + tokenizer.setState(bogus_comment_state); + } + } + return true; + } + + function cdata_section_state(buffer) { + var data = buffer.matchUntil(']]>'); + // skip ]]> + buffer.shift(3); + if (data) { + tokenizer._emitToken({type: 'Characters', data: data}); + } + tokenizer.setState(data_state); + return true; + } + + function comment_start_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-comment"); + tokenizer._emitToken(tokenizer._currentToken); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer.setState(comment_start_dash_state); + } else if (data === '>') { + tokenizer._parseError("incorrect-comment"); + tokenizer._emitToken(tokenizer._currentToken); + tokenizer.setState(data_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data += "\uFFFD"; + } else { + tokenizer._currentToken.data += data; + tokenizer.setState(comment_state); + } + return true; + } + + function comment_start_dash_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-comment"); + tokenizer._emitToken(tokenizer._currentToken); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer.setState(comment_end_state); + } else if (data === '>') { + tokenizer._parseError("incorrect-comment"); + tokenizer._emitToken(tokenizer._currentToken); + tokenizer.setState(data_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data += "\uFFFD"; + } else { + tokenizer._currentToken.data += '-' + data; + tokenizer.setState(comment_state); + } + return true; + } + + function comment_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-comment"); + tokenizer._emitToken(tokenizer._currentToken); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer.setState(comment_end_dash_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data += "\uFFFD"; + } else { + tokenizer._currentToken.data += data; + buffer.commit(); + } + return true; + } + + function comment_end_dash_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-comment-end-dash"); + tokenizer._emitToken(tokenizer._currentToken); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer.setState(comment_end_state); + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data += "-\uFFFD"; + tokenizer.setState(comment_state); + } else { + tokenizer._currentToken.data += '-' + data + buffer.matchUntil('\u0000|-'); + // Consume the next character which is either a "-" or an :EOF as + // well so if there's a "-" directly after the "-" we go nicely to + // the "comment end state" without emitting a tokenizer._parseError there. + buffer.char(); + } + return true; + } + + function comment_end_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-comment-double-dash"); + tokenizer._emitToken(tokenizer._currentToken); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '>') { + tokenizer._emitToken(tokenizer._currentToken); + tokenizer.setState(data_state); + } else if (data === '!') { + tokenizer._parseError("unexpected-bang-after-double-dash-in-comment"); + tokenizer.setState(comment_end_bang_state); + } else if (data === '-') { + tokenizer._parseError("unexpected-dash-after-double-dash-in-comment"); + tokenizer._currentToken.data += data; + } else if (data === '\u0000') { + tokenizer._parseError("invalid-codepoint"); + tokenizer._currentToken.data += "--\uFFFD"; + tokenizer.setState(comment_state); + } else { + // XXX + tokenizer._parseError("unexpected-char-in-comment"); + tokenizer._currentToken.data += '--' + data; + tokenizer.setState(comment_state); + } + return true; + } + + function comment_end_bang_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-comment-end-bang-state"); + tokenizer._emitToken(tokenizer._currentToken); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '>') { + tokenizer._emitToken(tokenizer._currentToken); + tokenizer.setState(data_state); + } else if (data === '-') { + tokenizer._currentToken.data += '--!'; + tokenizer.setState(comment_end_dash_state); + } else { + tokenizer._currentToken.data += '--!' + data; + tokenizer.setState(comment_state); + } + return true; + } + + function doctype_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("expected-doctype-name-but-got-eof"); + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isWhitespace(data)) { + tokenizer.setState(before_doctype_name_state); + } else { + tokenizer._parseError("need-space-after-doctype"); + buffer.unget(data); + tokenizer.setState(before_doctype_name_state); + } + return true; + } + + function before_doctype_name_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("expected-doctype-name-but-got-eof"); + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isWhitespace(data)) { + // pass + } else if (data === '>') { + tokenizer._parseError("expected-doctype-name-but-got-right-bracket"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + if (isAlpha(data)) + data = data.toLowerCase(); + tokenizer._currentToken.name = data; + tokenizer.setState(doctype_name_state); + } + return true; + } + + function doctype_name_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer._parseError("eof-in-doctype-name"); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isWhitespace(data)) { + tokenizer.setState(after_doctype_name_state); + } else if (data === '>') { + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + if (isAlpha(data)) + data = data.toLowerCase(); + tokenizer._currentToken.name += data; + buffer.commit(); + } + return true; + } + + function after_doctype_name_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer._parseError("eof-in-doctype"); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isWhitespace(data)) { + // pass + } else if (data === '>') { + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + if (['p', 'P'].indexOf(data) > -1) { + var expected = [['u', 'U'], ['b', 'B'], ['l', 'L'], ['i', 'I'], ['c', 'C']]; + var matched = expected.every(function(expected){ + data = buffer.char(); + return expected.indexOf(data) > -1; + }); + if (matched) { + tokenizer.setState(after_doctype_public_keyword_state); + return true; + } + } else if (['s', 'S'].indexOf(data) > -1) { + var expected = [['y', 'Y'], ['s', 'S'], ['t', 'T'], ['e', 'E'], ['m', 'M']]; + var matched = expected.every(function(expected){ + data = buffer.char(); + return expected.indexOf(data) > -1; + }); + if (matched) { + tokenizer.setState(after_doctype_system_keyword_state); + return true; + } + } + + // All the characters read before the current 'data' will be + // [a-zA-Z], so they're garbage in the bogus doctype and can be + // discarded; only the latest character might be '>' or EOF + // and needs to be ungetted + buffer.unget(data); + tokenizer._currentToken.forceQuirks = true; + + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + tokenizer._parseError("expected-space-or-right-bracket-in-doctype", {data: data}); + tokenizer.setState(bogus_doctype_state); + } + } + return true; + } + + function after_doctype_public_keyword_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isWhitespace(data)) { + tokenizer.setState(before_doctype_public_identifier_state); + } else if (data === "'" || data === '"') { + tokenizer._parseError("unexpected-char-in-doctype"); + buffer.unget(data); + tokenizer.setState(before_doctype_public_identifier_state); + } else { + buffer.unget(data); + tokenizer.setState(before_doctype_public_identifier_state); + } + return true; + } + + function before_doctype_public_identifier_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (isWhitespace(data)) { + // pass + } else if (data === '"') { + tokenizer._currentToken.publicId = ''; + tokenizer.setState(doctype_public_identifier_double_quoted_state); + } else if (data === "'") { + tokenizer._currentToken.publicId = ''; + tokenizer.setState(doctype_public_identifier_single_quoted_state); + } else if (data === '>') { + tokenizer._parseError("unexpected-end-of-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(bogus_doctype_state); + } + return true; + } + + function doctype_public_identifier_double_quoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (data === '"') { + tokenizer.setState(after_doctype_public_identifier_state); + } else if (data === '>') { + tokenizer._parseError("unexpected-end-of-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + tokenizer._currentToken.publicId += data; + } + return true; + } + + function doctype_public_identifier_single_quoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + buffer.unget(data); + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (data === "'") { + tokenizer.setState(after_doctype_public_identifier_state); + } else if (data === '>') { + tokenizer._parseError("unexpected-end-of-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else { + tokenizer._currentToken.publicId += data; + } + return true; + } + + function after_doctype_public_identifier_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + tokenizer.setState(between_doctype_public_and_system_identifiers_state); + } else if (data === '>') { + tokenizer.setState(data_state); + tokenizer._emitCurrentToken(); + } else if (data === '"') { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer._currentToken.systemId = ''; + tokenizer.setState(doctype_system_identifier_double_quoted_state); + } else if (data === "'") { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer._currentToken.systemId = ''; + tokenizer.setState(doctype_system_identifier_single_quoted_state); + } else { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(bogus_doctype_state); + } + return true; + } + + function between_doctype_public_and_system_identifiers_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + // pass + } else if (data === '>') { + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else if (data === '"') { + tokenizer._currentToken.systemId = ''; + tokenizer.setState(doctype_system_identifier_double_quoted_state); + } else if (data === "'") { + tokenizer._currentToken.systemId = ''; + tokenizer.setState(doctype_system_identifier_single_quoted_state); + } else { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(bogus_doctype_state); + } + return true; + } + + function after_doctype_system_keyword_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + tokenizer.setState(before_doctype_system_identifier_state); + } else if (data === "'" || data === '"') { + tokenizer._parseError("unexpected-char-in-doctype"); + buffer.unget(data); + tokenizer.setState(before_doctype_system_identifier_state); + } else { + buffer.unget(data); + tokenizer.setState(before_doctype_system_identifier_state); + } + return true; + } + + function before_doctype_system_identifier_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + // pass + } else if (data === '"') { + tokenizer._currentToken.systemId = ''; + tokenizer.setState(doctype_system_identifier_double_quoted_state); + } else if (data === "'") { + tokenizer._currentToken.systemId = ''; + tokenizer.setState(doctype_system_identifier_single_quoted_state); + } else if (data === '>') { + tokenizer._parseError("unexpected-end-of-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer.setState(bogus_doctype_state); + } + return true; + } + + function doctype_system_identifier_double_quoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === '"') { + tokenizer.setState(after_doctype_system_identifier_state); + } else if (data === '>') { + tokenizer._parseError("unexpected-end-of-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else { + tokenizer._currentToken.systemId += data; + } + return true; + } + + function doctype_system_identifier_single_quoted_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (data === "'") { + tokenizer.setState(after_doctype_system_identifier_state); + } else if (data === '>') { + tokenizer._parseError("unexpected-end-of-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else { + tokenizer._currentToken.systemId += data; + } + return true; + } + + function after_doctype_system_identifier_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + tokenizer._parseError("eof-in-doctype"); + tokenizer._currentToken.forceQuirks = true; + tokenizer._emitCurrentToken(); + buffer.unget(data); + tokenizer.setState(data_state); + } else if (isWhitespace(data)) { + // pass + } else if (data === '>') { + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else { + tokenizer._parseError("unexpected-char-in-doctype"); + tokenizer.setState(bogus_doctype_state); + } + return true; + } + + function bogus_doctype_state(buffer) { + var data = buffer.char(); + if (data === InputStream.EOF) { + buffer.unget(data); + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } else if (data === '>') { + tokenizer._emitCurrentToken(); + tokenizer.setState(data_state); + } + return true; + } +}; + +exports.Tokenizer = Tokenizer; + +},{"./EntityParser":2,"./InputStream":3}],6:[function(require,module,exports){ +(function(){var assert = require('assert'); + +var messages = require('./messages.json'); +var constants = require('./constants'); + +var EventEmitter = require('events').EventEmitter; + +var Tokenizer = require('./Tokenizer').Tokenizer; +var ElementStack = require('./ElementStack').ElementStack; +var StackItem = require('./StackItem').StackItem; + +var Marker = {}; + +function isWhitespace(ch) { + return ch === " " || ch === "\n" || ch === "\t" || ch === "\r" || ch === "\f"; +} + +function isWhitespaceOrReplacementCharacter(ch) { + return isWhitespace(ch) || ch === '\uFFFD'; +} + +function isAllWhitespace(characters) { + for (var i = 0; i < characters.length; i++) { + var ch = characters[i]; + if (!isWhitespace(ch)) + return false; + } + return true; +} + +function isAllWhitespaceOrReplacementCharacters(characters) { + for (var i = 0; i < characters.length; i++) { + var ch = characters[i]; + if (!isWhitespaceOrReplacementCharacter(ch)) + return false; + } + return true; +} + +function getAttribute(node, name) { + for (var i = 0; i < node.attributes.length; i++) { + var attribute = node.attributes[i]; + if (attribute.nodeName === name) { + return attribute; + } + } + return null; +} + +function CharacterBuffer(characters) { + this.characters = characters; + this.current = 0; + this.end = this.characters.length; +} + +CharacterBuffer.prototype.skipAtMostOneLeadingNewline = function() { + if (this.characters[this.current] === '\n') + this.current++; +}; + +CharacterBuffer.prototype.skipLeadingWhitespace = function() { + while (isWhitespace(this.characters[this.current])) { + if (++this.current == this.end) + return; + } +}; + +CharacterBuffer.prototype.skipLeadingNonWhitespace = function() { + while (!isWhitespace(this.characters[this.current])) { + if (++this.current == this.end) + return; + } +}; + +CharacterBuffer.prototype.takeRemaining = function() { + return this.characters.substring(this.current); +}; + +CharacterBuffer.prototype.takeLeadingWhitespace = function() { + var start = this.current; + this.skipLeadingWhitespace(); + if (start === this.current) + return ""; + return this.characters.substring(start, this.current - start); +}; + +Object.defineProperty(CharacterBuffer.prototype, 'length', { + get: function(){ + return this.end - this.current; + } +}); + +/** + * + * @constructor + */ +function TreeBuilder() { + this.tokenizer = null; + this.errorHandler = null; + this.scriptingEnabled = false; + this.document = null; + this.head = null; + this.form = null; + this.openElements = new ElementStack(); + this.activeFormattingElements = []; + this.insertionMode = null; + this.insertionModeName = ""; + this.originalInsertionMode = ""; + this.inQuirksMode = false; // TODO quirks mode + this.compatMode = "no quirks"; + this.framesetOk = true; + this.redirectAttachToFosterParent = false; + this.selfClosingFlagAcknowledged = false; + this.context = ""; + this.firstStartTag = false; + this.pendingTableCharacters = []; + this.shouldSkipLeadingNewline = false; + + var tree = this; + var modes = this.insertionModes = {}; + modes.base = { + end_tag_handlers: {"-default": 'endTagOther'}, + start_tag_handlers: {"-default": 'startTagOther'}, + processEOF: function() { + tree.generateImpliedEndTags(); + if (tree.openElements.length > 2) { + tree.parseError('expected-closing-tag-but-got-eof'); + } else if (tree.openElements.length == 2 && + tree.openElements.item(1).localName != 'body') { + // This happens for framesets or something? + tree.parseError('expected-closing-tag-but-got-eof'); + } else if (tree.context && tree.openElements.length > 1) { + // XXX This is not what the specification says. Not sure what to do here. + //tree.parseError('eof-in-innerhtml'); + } + }, + processComment: function(data) { + // For most phases the following is forceQuirks. Where it's not it will be + // overridden. + tree.insertComment(data, tree.currentStackItem().node); + }, + processDoctype: function(name, publicId, systemId, forceQuirks) { + tree.parseError('unexpected-doctype'); + }, + processStartTag: function(name, attributes, selfClosing) { + if (this[this.start_tag_handlers[name]]) { + this[this.start_tag_handlers[name]](name, attributes, selfClosing); + } else if (this[this.start_tag_handlers["-default"]]) { + this[this.start_tag_handlers["-default"]](name, attributes, selfClosing); + } else { + throw(new Error("No handler found for "+name)); + } + }, + processEndTag: function(name) { + if (this[this.end_tag_handlers[name]]) { + this[this.end_tag_handlers[name]](name); + } else if (this[this.end_tag_handlers["-default"]]) { + this[this.end_tag_handlers["-default"]](name); + } else { + throw(new Error("No handler found for "+name)); + } + }, + startTagHtml: function(name, attributes) { + if (!tree.firstStartTag && name == 'html') { + tree.parseError('non-html-root'); + } + tree.addAttributesToElement(tree.openElements.rootNode, attributes); + tree.firstStartTag = false; + } + }; + + modes.initial = Object.create(modes.base); + + modes.initial.processEOF = function() { + tree.parseError("expected-doctype-but-got-eof"); + this.anythingElse(); + tree.insertionMode.processEOF(); + }; + + modes.initial.processComment = function(data) { + tree.insertComment(data, tree.document); + }; + + modes.initial.processDoctype = function(name, publicId, systemId, forceQuirks) { + tree.insertDoctype(name || '', publicId || '', systemId || ''); + + if (forceQuirks || name != 'html' || (publicId != null && ([ + "+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//ietf//dtd html//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//", + "html" + ].some(publicIdStartsWith) + || [ + "-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html" + ].indexOf(publicId.toLowerCase()) > -1 + || (systemId == null && [ + "-//w3c//dtd html 4.01 transitional//", + "-//w3c//dtd html 4.01 frameset//" + ].some(publicIdStartsWith))) + ) + || (systemId != null && (systemId.toLowerCase() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) + ) { + tree.compatMode = "quirks"; + tree.parseError("quirky-doctype"); + } else if (publicId != null && ([ + "-//w3c//dtd xhtml 1.0 transitional//", + "-//w3c//dtd xhtml 1.0 frameset//" + ].some(publicIdStartsWith) + || (systemId != null && [ + "-//w3c//dtd html 4.01 transitional//", + "-//w3c//dtd html 4.01 frameset//" + ].indexOf(publicId.toLowerCase()) > -1)) + ) { + tree.compatMode = "limited quirks"; + tree.parseError("almost-standards-doctype"); + } else { + if ((publicId == "-//W3C//DTD HTML 4.0//EN" && (systemId == null || systemId == "http://www.w3.org/TR/REC-html40/strict.dtd")) + || (publicId == "-//W3C//DTD HTML 4.01//EN" && (systemId == null || systemId == "http://www.w3.org/TR/html4/strict.dtd")) + || (publicId == "-//W3C//DTD XHTML 1.0 Strict//EN" && (systemId == "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")) + || (publicId == "-//W3C//DTD XHTML 1.1//EN" && (systemId == "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd")) + ) { + // warning + //tree.warn("obsolete-doctype"); + } else if (!((systemId == null || systemId == "about:legacy-compat") && publicId == null)) { + tree.parseError("unknown-doctype"); + } + } + tree.setInsertionMode('beforeHTML'); + function publicIdStartsWith(string) { + return publicId.toLowerCase().indexOf(string) === 0; + } + }; + + modes.initial.processCharacters = function(buffer) { + buffer.skipLeadingWhitespace(); + if (!buffer.length) + return; + tree.parseError('expected-doctype-but-got-chars'); + this.anythingElse(); + tree.insertionMode.processCharacters(buffer); + }; + + modes.initial.processStartTag = function(name, attributes, selfClosing) { + tree.parseError('expected-doctype-but-got-start-tag', {name: name}); + this.anythingElse(); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.initial.processEndTag = function(name) { + tree.parseError('expected-doctype-but-got-end-tag', {name: name}); + this.anythingElse(); + tree.insertionMode.processEndTag(name); + }; + + modes.initial.anythingElse = function() { + tree.compatMode = 'quirks'; + tree.setInsertionMode('beforeHTML'); + }; + + modes.beforeHTML = Object.create(modes.base); + + modes.beforeHTML.start_tag_handlers = { + html: 'startTagHtml', + '-default': 'startTagOther' + }; + + modes.beforeHTML.processEOF = function() { + this.anythingElse(); + tree.insertionMode.processEOF(); + }; + + modes.beforeHTML.processComment = function(data) { + tree.insertComment(data, tree.document); + }; + + modes.beforeHTML.processCharacters = function(buffer) { + buffer.skipLeadingWhitespace(); + if (!buffer.length) + return; + this.anythingElse(); + tree.insertionMode.processCharacters(buffer); + }; + + modes.beforeHTML.startTagHtml = function(name, attributes, selfClosing) { + tree.firstStartTag = true; + tree.insertHtmlElement(attributes); + tree.setInsertionMode('beforeHead'); + }; + + modes.beforeHTML.startTagOther = function(name, attributes, selfClosing) { + this.anythingElse(); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.beforeHTML.processEndTag = function(name) { + this.anythingElse(); + tree.insertionMode.processEndTag(name); + }; + + modes.beforeHTML.anythingElse = function() { + tree.insertHtmlElement(); + tree.setInsertionMode('beforeHead'); + }; + + modes.afterAfterBody = Object.create(modes.base); + + modes.afterAfterBody.start_tag_handlers = { + html: 'startTagHtml', + '-default': 'startTagOther' + }; + + modes.afterAfterBody.processComment = function(data) { + tree.insertComment(data, tree.document); + }; + + modes.afterAfterBody.processDoctype = function(data) { + modes.inBody.processDoctype(data); + }; + + modes.afterAfterBody.startTagHtml = function(data, attributes) { + modes.inBody.startTagHtml(data, attributes); + }; + + modes.afterAfterBody.startTagOther = function(name, attributes, selfClosing) { + tree.parseError('unexpected-start-tag', {name: name}); + tree.setInsertionMode('inBody'); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.afterAfterBody.endTagOther = function(name) { + tree.parseError('unexpected-end-tag', {name: name}); + tree.setInsertionMode('inBody'); + tree.insertionMode.processEndTag(name); + }; + + modes.afterAfterBody.processCharacters = function(data) { + if (!isAllWhitespace(data.characters)) { + tree.parseError('unexpected-char-after-body'); + tree.setInsertionMode('inBody'); + return tree.insertionMode.processCharacters(data); + } + modes.inBody.processCharacters(data); + }; + + modes.afterBody = Object.create(modes.base); + + modes.afterBody.end_tag_handlers = { + html: 'endTagHtml', + '-default': 'endTagOther' + }; + + modes.afterBody.processComment = function(data) { + // This is needed because data is to be appended to the html element here + // and not to whatever is currently open. + tree.insertComment(data, tree.openElements.rootNode); + }; + + modes.afterBody.processCharacters = function(data) { + if (!isAllWhitespace(data.characters)) { + tree.parseError('unexpected-char-after-body'); + tree.setInsertionMode('inBody'); + return tree.insertionMode.processCharacters(data); + } + modes.inBody.processCharacters(data); + }; + + modes.afterBody.processStartTag = function(name, attributes, selfClosing) { + tree.parseError('unexpected-start-tag-after-body', {name: name}); + tree.setInsertionMode('inBody'); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.afterBody.endTagHtml = function(name) { + if (tree.context) { + tree.parseError('end-html-in-innerhtml'); + } else { + // XXX This may need to be done, not sure + // Don't set last_phase to the current phase but to the inBody phase + // instead. No need for extra parseErrors if there's something after + // . + // Try XX for instance + tree.setInsertionMode('afterAfterBody'); + } + }; + + modes.afterBody.endTagOther = function(name) { + tree.parseError('unexpected-end-tag-after-body', {name: name}); + tree.setInsertionMode('inBody'); + tree.insertionMode.processEndTag(name); + }; + + modes.afterFrameset = Object.create(modes.base); + + modes.afterFrameset.start_tag_handlers = { + html: 'startTagHtml', + noframes: 'startTagNoframes', + '-default': 'startTagOther' + }; + + modes.afterFrameset.end_tag_handlers = { + html: 'endTagHtml', + '-default': 'endTagOther' + }; + + modes.afterFrameset.processCharacters = function(buffer) { + var characters = buffer.takeRemaining(); + var whitespace = ""; + for (var i = 0; i < characters.length; i++) { + var ch = characters[i]; + if (isWhitespace(ch)) + whitespace += ch; + } + if (whitespace) { + tree.insertText(whitespace); + } + if (whitespace.length < characters.length) + tree.parseError('expected-eof-but-got-char'); + }; + + modes.afterFrameset.startTagNoframes = function(name, attributes) { + modes.inHead.processStartTag(name, attributes); + }; + + modes.afterFrameset.startTagOther = function(name, attributes) { + tree.parseError("unexpected-start-tag-after-frameset", {name: name}); + }; + + modes.afterFrameset.endTagHtml = function(name) { + tree.setInsertionMode('afterAfterFrameset'); + }; + + modes.afterFrameset.endTagOther = function(name) { + tree.parseError("unexpected-end-tag-after-frameset", {name: name}); + }; + + modes.beforeHead = Object.create(modes.base); + + modes.beforeHead.start_tag_handlers = { + html: 'startTagHtml', + head: 'startTagHead', + '-default': 'startTagOther' + }; + + modes.beforeHead.end_tag_handlers = { + html: 'endTagImplyHead', + head: 'endTagImplyHead', + body: 'endTagImplyHead', + br: 'endTagImplyHead', + '-default': 'endTagOther' + }; + + modes.beforeHead.processEOF = function() { + this.startTagHead('head', []); + tree.insertionMode.processEOF(); + }; + + modes.beforeHead.processCharacters = function(buffer) { + buffer.skipLeadingWhitespace(); + if (!buffer.length) + return; + this.startTagHead('head', []); + tree.insertionMode.processCharacters(buffer); + }; + + modes.beforeHead.startTagHead = function(name, attributes) { + tree.insertHeadElement(attributes); + tree.setInsertionMode('inHead'); + }; + + modes.beforeHead.startTagOther = function(name, attributes, selfClosing) { + this.startTagHead('head', []); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.beforeHead.endTagImplyHead = function(name) { + this.startTagHead('head', []); + tree.insertionMode.processEndTag(name); + }; + + modes.beforeHead.endTagOther = function(name) { + tree.parseError('end-tag-after-implied-root', {name: name}); + }; + + modes.inHead = Object.create(modes.base); + + modes.inHead.start_tag_handlers = { + html: 'startTagHtml', + head: 'startTagHead', + title: 'startTagTitle', + script: 'startTagScript', + style: 'startTagNoFramesStyle', + noscript: 'startTagNoScript', + noframes: 'startTagNoFramesStyle', + base: 'startTagBaseLinkCommand', + basefont: 'startTagBaseLinkCommand', + bgsound: 'startTagBaseLinkCommand', + command: 'startTagBaseLinkCommand', //FIXME drop command tag? + link: 'startTagBaseLinkCommand', + meta: 'startTagMeta', + "-default": 'startTagOther' + }; + + modes.inHead.end_tag_handlers = { + head: 'endTagHead', + html: 'endTagHtmlBodyBr', + body: 'endTagHtmlBodyBr', + br: 'endTagHtmlBodyBr', + "-default": 'endTagOther' + }; + + modes.inHead.processEOF = function() { + var name = tree.currentStackItem().localName; + if (['title', 'style', 'script'].indexOf(name) != -1) { + tree.parseError("expected-named-closing-tag-but-got-eof", {name: name}); + tree.popElement(); + } + + this.anythingElse(); + + tree.insertionMode.processEOF(); + }; + + modes.inHead.processCharacters = function(buffer) { + var leadingWhitespace = buffer.takeLeadingWhitespace(); + if (leadingWhitespace) + tree.insertText(leadingWhitespace); + if (!buffer.length) + return; + this.anythingElse(); + tree.insertionMode.processCharacters(buffer); + }; + + modes.inHead.startTagHtml = function(name, attributes) { + modes.inBody.processStartTag(name, attributes); + }; + + modes.inHead.startTagHead = function(name, attributes) { + tree.parseError('two-heads-are-not-better-than-one'); + }; + + modes.inHead.startTagTitle = function(name, attributes) { + tree.processGenericRCDATAStartTag(name, attributes); + }; + + modes.inHead.startTagNoScript = function(name, attributes) { + if (tree.scriptingEnabled) + return tree.processGenericRawTextStartTag(name, attributes); + tree.insertElement(name, attributes); + tree.setInsertionMode('inHeadNoscript'); + }; + + modes.inHead.startTagNoFramesStyle = function(name, attributes) { + // XXX Need to decide whether to implement the scripting disabled case + tree.processGenericRawTextStartTag(name, attributes); + }; + + modes.inHead.startTagScript = function(name, attributes) { + tree.insertElement(name, attributes); + tree.tokenizer.setState(Tokenizer.SCRIPT_DATA); + tree.originalInsertionMode = tree.insertionModeName; + tree.setInsertionMode('text'); + }; + + modes.inHead.startTagBaseLinkCommand = function(name, attributes) { + tree.insertSelfClosingElement(name, attributes); + }; + + modes.inHead.startTagMeta = function(name, attributes) { + tree.insertSelfClosingElement(name, attributes); + // @todo process charset attributes + }; + + modes.inHead.startTagOther = function(name, attributes, selfClosing) { + this.anythingElse(); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.inHead.endTagHead = function(name) { + if (tree.openElements.item(tree.openElements.length - 1).localName == 'head') { + tree.openElements.pop(); + } else { + tree.parseError('unexpected-end-tag', {name: 'head'}); + } + tree.setInsertionMode('afterHead'); + }; + + modes.inHead.endTagHtmlBodyBr = function(name) { + this.anythingElse(); + tree.insertionMode.processEndTag(name); + }; + + modes.inHead.endTagOther = function(name) { + tree.parseError('unexpected-end-tag', {name: name}); + }; + + modes.inHead.anythingElse = function() { + this.endTagHead('head'); + }; + + modes.afterHead = Object.create(modes.base); + + modes.afterHead.start_tag_handlers = { + html: 'startTagHtml', + head: 'startTagHead', + body: 'startTagBody', + frameset: 'startTagFrameset', + base: 'startTagFromHead', + link: 'startTagFromHead', + meta: 'startTagFromHead', + script: 'startTagFromHead', + // XXX noframes: 'startTagFromHead' ? + style: 'startTagFromHead', + title: 'startTagFromHead', + "-default": 'startTagOther' + }; + + modes.afterHead.end_tag_handlers = { + body: 'endTagBodyHtmlBr', + html: 'endTagBodyHtmlBr', + br: 'endTagBodyHtmlBr', + "-default": 'endTagOther' + }; + + modes.afterHead.processEOF = function() { + this.anythingElse(); + tree.insertionMode.processEOF(); + }; + + modes.afterHead.processCharacters = function(buffer) { + var leadingWhitespace = buffer.takeLeadingWhitespace(); + if (leadingWhitespace) + tree.insertText(leadingWhitespace); + if (!buffer.length) + return; + this.anythingElse(); + tree.insertionMode.processCharacters(buffer); + }; + + modes.afterHead.startTagHtml = function(name, attributes) { + modes.inBody.processStartTag(name, attributes); + }; + + modes.afterHead.startTagBody = function(name, attributes) { + tree.framesetOk = false; + tree.insertBodyElement(attributes); + tree.setInsertionMode('inBody'); + }; + + modes.afterHead.startTagFrameset = function(name, attributes) { + tree.insertElement(name, attributes); + tree.setInsertionMode('inFrameset'); + }; + + modes.afterHead.startTagFromHead = function(name, attributes, selfClosing) { + tree.parseError("unexpected-start-tag-out-of-my-head", {name: name}); + // FIXME head pointer + tree.openElements.push(tree.head); + modes.inHead.processStartTag(name, attributes, selfClosing); + tree.openElements.remove(tree.head); + }; + + modes.afterHead.startTagHead = function(name, attributes, selfClosing) { + tree.parseError('unexpected-start-tag', {name: name}); + }; + + modes.afterHead.startTagOther = function(name, attributes, selfClosing) { + this.anythingElse(); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.afterHead.endTagBodyHtmlBr = function(name) { + this.anythingElse(); + tree.insertionMode.processEndTag(name); + }; + + modes.afterHead.endTagOther = function(name) { + tree.parseError('unexpected-end-tag', {name: name}); + }; + + modes.afterHead.anythingElse = function() { + tree.insertBodyElement([]); + tree.setInsertionMode('inBody'); + tree.framesetOk = true; + } + + modes.inBody = Object.create(modes.base); + + modes.inBody.start_tag_handlers = { + html: 'startTagHtml', + head: 'startTagMisplaced', + base: 'startTagProcessInHead', + basefont: 'startTagProcessInHead', + bgsound: 'startTagProcessInHead', + command: 'startTagProcessInHead', + link: 'startTagProcessInHead', + meta: 'startTagProcessInHead', + noframes: 'startTagProcessInHead', + script: 'startTagProcessInHead', + style: 'startTagProcessInHead', + title: 'startTagProcessInHead', + body: 'startTagBody', + form: 'startTagForm', + plaintext: 'startTagPlaintext', + a: 'startTagA', + button: 'startTagButton', + xmp: 'startTagXmp', + table: 'startTagTable', + hr: 'startTagHr', + image: 'startTagImage', + input: 'startTagInput', + textarea: 'startTagTextarea', + select: 'startTagSelect', + isindex: 'startTagIsindex', + applet: 'startTagAppletMarqueeObject', + marquee: 'startTagAppletMarqueeObject', + object: 'startTagAppletMarqueeObject', + li: 'startTagListItem', + dd: 'startTagListItem', + dt: 'startTagListItem', + address: 'startTagCloseP', + article: 'startTagCloseP', + aside: 'startTagCloseP', + blockquote: 'startTagCloseP', + center: 'startTagCloseP', + details: 'startTagCloseP', + dir: 'startTagCloseP', + div: 'startTagCloseP', + dl: 'startTagCloseP', + fieldset: 'startTagCloseP', + figcaption: 'startTagCloseP', + figure: 'startTagCloseP', + footer: 'startTagCloseP', + header: 'startTagCloseP', + hgroup: 'startTagCloseP', + main: 'startTagCloseP', + menu: 'startTagCloseP', + nav: 'startTagCloseP', + ol: 'startTagCloseP', + p: 'startTagCloseP', + section: 'startTagCloseP', + summary: 'startTagCloseP', + ul: 'startTagCloseP', + listing: 'startTagPreListing', + pre: 'startTagPreListing', + b: 'startTagFormatting', + big: 'startTagFormatting', + code: 'startTagFormatting', + em: 'startTagFormatting', + font: 'startTagFormatting', + i: 'startTagFormatting', + s: 'startTagFormatting', + small: 'startTagFormatting', + strike: 'startTagFormatting', + strong: 'startTagFormatting', + tt: 'startTagFormatting', + u: 'startTagFormatting', + nobr: 'startTagNobr', + area: 'startTagVoidFormatting', + br: 'startTagVoidFormatting', + embed: 'startTagVoidFormatting', + img: 'startTagVoidFormatting', + keygen: 'startTagVoidFormatting', + wbr: 'startTagVoidFormatting', + param: 'startTagParamSourceTrack', + source: 'startTagParamSourceTrack', + track: 'startTagParamSourceTrack', + iframe: 'startTagIFrame', + noembed: 'startTagRawText', + noscript: 'startTagRawText', + h1: 'startTagHeading', + h2: 'startTagHeading', + h3: 'startTagHeading', + h4: 'startTagHeading', + h5: 'startTagHeading', + h6: 'startTagHeading', + caption: 'startTagMisplaced', + col: 'startTagMisplaced', + colgroup: 'startTagMisplaced', + frame: 'startTagMisplaced', + frameset: 'startTagFrameset', + tbody: 'startTagMisplaced', + td: 'startTagMisplaced', + tfoot: 'startTagMisplaced', + th: 'startTagMisplaced', + thead: 'startTagMisplaced', + tr: 'startTagMisplaced', + option: 'startTagOptionOptgroup', + optgroup: 'startTagOptionOptgroup', + math: 'startTagMath', + svg: 'startTagSVG', + rt: 'startTagRpRt', + rp: 'startTagRpRt', + "-default": 'startTagOther' + }; + + modes.inBody.end_tag_handlers = { + p: 'endTagP', + body: 'endTagBody', + html: 'endTagHtml', + address: 'endTagBlock', + article: 'endTagBlock', + aside: 'endTagBlock', + blockquote: 'endTagBlock', + button: 'endTagBlock', + center: 'endTagBlock', + details: 'endTagBlock', + dir: 'endTagBlock', + div: 'endTagBlock', + dl: 'endTagBlock', + fieldset: 'endTagBlock', + figcaption: 'endTagBlock', + figure: 'endTagBlock', + footer: 'endTagBlock', + header: 'endTagBlock', + hgroup: 'endTagBlock', + listing: 'endTagBlock', + main: 'endTagBlock', + menu: 'endTagBlock', + nav: 'endTagBlock', + ol: 'endTagBlock', + pre: 'endTagBlock', + section: 'endTagBlock', + summary: 'endTagBlock', + ul: 'endTagBlock', + form: 'endTagForm', + applet: 'endTagAppletMarqueeObject', + marquee: 'endTagAppletMarqueeObject', + object: 'endTagAppletMarqueeObject', + dd: 'endTagListItem', + dt: 'endTagListItem', + li: 'endTagListItem', + h1: 'endTagHeading', + h2: 'endTagHeading', + h3: 'endTagHeading', + h4: 'endTagHeading', + h5: 'endTagHeading', + h6: 'endTagHeading', + a: 'endTagFormatting', + b: 'endTagFormatting', + big: 'endTagFormatting', + code: 'endTagFormatting', + em: 'endTagFormatting', + font: 'endTagFormatting', + i: 'endTagFormatting', + nobr: 'endTagFormatting', + s: 'endTagFormatting', + small: 'endTagFormatting', + strike: 'endTagFormatting', + strong: 'endTagFormatting', + tt: 'endTagFormatting', + u: 'endTagFormatting', + br: 'endTagBr', + "-default": 'endTagOther' + }; + + modes.inBody.processCharacters = function(buffer) { + if (tree.shouldSkipLeadingNewline) { + tree.shouldSkipLeadingNewline = false; + buffer.skipAtMostOneLeadingNewline(); + } + tree.reconstructActiveFormattingElements(); + var characters = buffer.takeRemaining(); + characters = characters.replace(/\u0000/g, function(match, index){ + // @todo position + tree.parseError("invalid-codepoint"); + return ''; + }); + if (!characters) + return; + tree.insertText(characters); + if (tree.framesetOk && !isAllWhitespaceOrReplacementCharacters(characters)) + tree.framesetOk = false; + }; + + modes.inBody.startTagProcessInHead = function(name, attributes) { + modes.inHead.processStartTag(name, attributes); + }; + + modes.inBody.startTagBody = function(name, attributes) { + tree.parseError('unexpected-start-tag', {name: 'body'}); + if (tree.openElements.length == 1 || + tree.openElements.item(1).localName != 'body') { + assert.ok(tree.context); + } else { + tree.framesetOk = false; + tree.addAttributesToElement(tree.openElements.bodyElement, attributes); + } + }; + + modes.inBody.startTagFrameset = function(name, attributes) { + tree.parseError('unexpected-start-tag', {name: 'frameset'}); + if (tree.openElements.length == 1 || + tree.openElements.item(1).localName != 'body') { + assert.ok(tree.context); + } else if (tree.framesetOk) { + tree.detachFromParent(tree.openElements.bodyElement); + while (tree.openElements.length > 1) + tree.openElements.pop(); + tree.insertElement(name, attributes); + tree.setInsertionMode('inFrameset'); + } + }; + + modes.inBody.startTagCloseP = function(name, attributes) { + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + tree.insertElement(name, attributes); + }; + + modes.inBody.startTagPreListing = function(name, attributes) { + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + tree.insertElement(name, attributes); + tree.framesetOk = false; + tree.shouldSkipLeadingNewline = true; + }; + + modes.inBody.startTagForm = function(name, attributes) { + if (tree.form) { + tree.parseError('unexpected-start-tag', {name: name}); + } else { + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + tree.insertElement(name, attributes); + tree.form = tree.currentStackItem(); + } + }; + + modes.inBody.startTagRpRt = function(name, attributes) { + if (tree.openElements.inScope('ruby')) { + tree.generateImpliedEndTags(); + if (tree.currentStackItem().localName != 'ruby') { + tree.parseError('unexpected-start-tag', {name: name}); + } + } + tree.insertElement(name, attributes); + }; + + modes.inBody.startTagListItem = function(name, attributes) { + /// @todo: Fix according to current spec. http://www.w3.org/TR/html5/tree-construction.html#parsing-main-inbody + var stopNames = {li: ['li'], dd: ['dd', 'dt'], dt: ['dd', 'dt']}; + var stopName = stopNames[name]; + + var els = tree.openElements; + for (var i = els.length - 1; i >= 0; i--) { + var node = els.item(i); + if (stopName.indexOf(node.localName) != -1) { + tree.insertionMode.processEndTag(node.localName); + break; + } + + // todo isScoping() + if (node.isSpecial() && node.localName !== 'p' && node.localName !== 'address' && node.localName !== 'div') + break; + } + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + + // Always insert an
  • element + tree.insertElement(name, attributes); + tree.framesetOk = false; + }; + + modes.inBody.startTagPlaintext = function(name, attributes) { + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + tree.insertElement(name, attributes); + tree.tokenizer.setState(Tokenizer.PLAINTEXT); + }; + + modes.inBody.startTagHeading = function(name, attributes) { + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + if (tree.currentStackItem().isNumberedHeader()) { + tree.parseError('unexpected-start-tag', {name: name}); + tree.popElement(); + } + tree.insertElement(name, attributes); + }; + + modes.inBody.startTagA = function(name, attributes) { + var activeA = tree.elementInActiveFormattingElements('a'); + if (activeA) { + tree.parseError("unexpected-start-tag-implies-end-tag", {startName: "a", endName: "a"}); + tree.adoptionAgencyEndTag('a'); + if (tree.openElements.contains(activeA)) + tree.openElements.remove(activeA); + tree.removeElementFromActiveFormattingElements(activeA); + } + tree.reconstructActiveFormattingElements(); + tree.insertFormattingElement(name, attributes); + }; + + modes.inBody.startTagFormatting = function(name, attributes) { + tree.reconstructActiveFormattingElements(); + tree.insertFormattingElement(name, attributes); + }; + + modes.inBody.startTagNobr = function(name, attributes) { + tree.reconstructActiveFormattingElements(); + if (tree.openElements.inScope('nobr')) { + tree.parseError("unexpected-start-tag-implies-end-tag", {startName: 'nobr', endName: 'nobr'}); + this.processEndTag('nobr'); + // XXX Need tests that trigger the following + tree.reconstructActiveFormattingElements(); + } + tree.insertFormattingElement(name, attributes); + }; + + modes.inBody.startTagButton = function(name, attributes) { + if (tree.openElements.inScope('button')) { + tree.parseError('unexpected-start-tag-implies-end-tag', {startName: 'button', endName: 'button'}); + this.processEndTag('button'); + tree.insertionMode.processStartTag(name, attributes); + } else { + tree.framesetOk = false; + tree.reconstructActiveFormattingElements(); + tree.insertElement(name, attributes); + } + }; + + modes.inBody.startTagAppletMarqueeObject = function(name, attributes) { + tree.reconstructActiveFormattingElements(); + tree.insertElement(name, attributes); + tree.activeFormattingElements.push(Marker); + tree.framesetOk = false; + }; + + modes.inBody.endTagAppletMarqueeObject = function(name) { + if (!tree.openElements.inScope(name)) { + tree.parseError("unexpected-end-tag", {name: name}); + } else { + tree.generateImpliedEndTags(); + if (tree.currentStackItem().localName != name) { + tree.parseError('end-tag-too-early', {name: name}); + } + tree.openElements.popUntilPopped(name); + tree.clearActiveFormattingElements(); + } + }; + + modes.inBody.startTagXmp = function(name, attributes) { + if (tree.openElements.inButtonScope('p')) + this.processEndTag('p'); + tree.reconstructActiveFormattingElements(); + tree.processGenericRawTextStartTag(name, attributes); + tree.framesetOk = false; + }; + + modes.inBody.startTagTable = function(name, attributes) { + if (tree.compatMode !== "quirks") + if (tree.openElements.inButtonScope('p')) + this.processEndTag('p'); + tree.insertElement(name, attributes); + tree.setInsertionMode('inTable'); + tree.framesetOk = false; + }; + + modes.inBody.startTagVoidFormatting = function(name, attributes) { + tree.reconstructActiveFormattingElements(); + tree.insertSelfClosingElement(name, attributes); + tree.framesetOk = false; + }; + + modes.inBody.startTagParamSourceTrack = function(name, attributes) { + tree.insertSelfClosingElement(name, attributes); + }; + + modes.inBody.startTagHr = function(name, attributes) { + if (tree.openElements.inButtonScope('p')) + this.endTagP('p'); + tree.insertSelfClosingElement(name, attributes); + tree.framesetOk = false; + }; + + modes.inBody.startTagImage = function(name, attributes) { + // No, really... + tree.parseError('unexpected-start-tag-treated-as', {originalName: 'image', newName: 'img'}); + this.processStartTag('img', attributes); + }; + + modes.inBody.startTagInput = function(name, attributes) { + var currentFramesetOk = tree.framesetOk; + this.startTagVoidFormatting(name, attributes); + for (var key in attributes) { + // input type=hidden doesn't change framesetOk + if (attributes[key].nodeName == 'type') { + if (attributes[key].nodeValue.toLowerCase() == 'hidden') + tree.framesetOk = currentFramesetOk; + break; + } + } + }; + + modes.inBody.startTagIsindex = function(name, attributes) { + tree.parseError('deprecated-tag', {name: 'isindex'}); + tree.selfClosingFlagAcknowledged = true; + if (tree.form) + return; + var formAttributes = []; + var inputAttributes = []; + var prompt = "This is a searchable index. Enter search keywords: "; + for (var key in attributes) { + switch (attributes[key].nodeName) { + case 'action': + formAttributes.push({nodeName: 'action', + nodeValue: attributes[key].nodeValue}); + break; + case 'prompt': + prompt = attributes[key].nodeValue; + break; + case 'name': + break; + default: + inputAttributes.push({nodeName: attributes[key].nodeName, + nodeValue: attributes[key].nodeValue}); + } + } + inputAttributes.push({nodeName: 'name', nodeValue: 'isindex'}); + this.processStartTag('form', formAttributes); + this.processStartTag('hr'); + this.processStartTag('label'); + this.processCharacters(new CharacterBuffer(prompt)); + this.processStartTag('input', inputAttributes); + this.processEndTag('label'); + this.processStartTag('hr'); + this.processEndTag('form'); + }; + + modes.inBody.startTagTextarea = function(name, attributes) { + // XXX Form element pointer checking here as well... + tree.insertElement(name, attributes); + tree.tokenizer.setState(Tokenizer.RCDATA); + tree.originalInsertionMode = tree.insertionModeName; + tree.shouldSkipLeadingNewline = true; + tree.framesetOk = false; + tree.setInsertionMode('text'); + }; + + modes.inBody.startTagIFrame = function(name, attributes) { + tree.framesetOk = false; + this.startTagRawText(name, attributes); + }; + + modes.inBody.startTagRawText = function(name, attributes) { + tree.processGenericRawTextStartTag(name, attributes); + }; + + modes.inBody.startTagSelect = function(name, attributes) { + tree.reconstructActiveFormattingElements(); + tree.insertElement(name, attributes); + tree.framesetOk = false; + var insertionModeName = tree.insertionModeName; + if (insertionModeName == 'inTable' || + insertionModeName == 'inCaption' || + insertionModeName == 'inColumnGroup' || + insertionModeName == 'inTableBody' || + insertionModeName == 'inRow' || + insertionModeName == 'inCell') { + tree.setInsertionMode('inSelectInTable'); + } else { + tree.setInsertionMode('inSelect'); + } + }; + + modes.inBody.startTagMisplaced = function(name, attributes) { + tree.parseError('unexpected-start-tag-ignored', {name: name}); + }; + + modes.inBody.endTagMisplaced = function(name) { + // This handles elements with end tags in other insertion modes. + tree.parseError("unexpected-end-tag", {name: name}); + }; + + modes.inBody.endTagBr = function(name) { + tree.parseError("unexpected-end-tag-treated-as", {originalName: "br", newName: "br element"}); + tree.reconstructActiveFormattingElements(); + tree.insertElement(name, []); + tree.popElement(); + }; + + modes.inBody.startTagOptionOptgroup = function(name, attributes) { + if (tree.currentStackItem().localName == 'option') + tree.popElement(); + tree.reconstructActiveFormattingElements(); + tree.insertElement(name, attributes); + }; + + modes.inBody.startTagOther = function(name, attributes) { + tree.reconstructActiveFormattingElements(); + tree.insertElement(name, attributes); + }; + + modes.inBody.endTagOther = function(name) { + var node; + for (var i = tree.openElements.length - 1; i > 0; i--) { + node = tree.openElements.item(i); + if (node.localName == name) { + tree.generateImpliedEndTags(name); + if (tree.currentStackItem().localName != name) + tree.parseError('unexpected-end-tag', {name: name}); + // todo optimize + tree.openElements.remove_openElements_until(function(x) {return x === node;}); + break; + } + if (node.isSpecial()) { + tree.parseError('unexpected-end-tag', {name: name}); + break; + } + } + }; + + modes.inBody.startTagMath = function(name, attributes, selfClosing) { + tree.reconstructActiveFormattingElements(); + attributes = tree.adjustMathMLAttributes(attributes); + attributes = tree.adjustForeignAttributes(attributes); + tree.insertForeignElement(name, attributes, "http://www.w3.org/1998/Math/MathML", selfClosing); + // Need to get the parse error right for the case where the token + // has a namespace not equal to the xmlns attribute + }; + + modes.inBody.startTagSVG = function(name, attributes, selfClosing) { + tree.reconstructActiveFormattingElements(); + attributes = tree.adjustSVGAttributes(attributes); + attributes = tree.adjustForeignAttributes(attributes); + tree.insertForeignElement(name, attributes, "http://www.w3.org/2000/svg", selfClosing); + // Need to get the parse error right for the case where the token + // has a namespace not equal to the xmlns attribute + }; + + modes.inBody.endTagP = function(name) { + if (!tree.openElements.inButtonScope('p')) { + tree.parseError('unexpected-end-tag', {name: 'p'}); + this.startTagCloseP('p', []); + this.endTagP('p'); + } else { + tree.generateImpliedEndTags('p'); + if (tree.currentStackItem().localName != 'p') + tree.parseError('unexpected-end-tag', {name: 'p'}); + tree.openElements.popUntilPopped(name); + } + }; + + modes.inBody.endTagBody = function(name) { + if (!tree.openElements.inScope('body')) { + tree.parseError('unexpected-end-tag', {name: name}); + return; + } + + /// @todo Emit parse error on end tags other than the ones listed in http://www.w3.org/TR/html5/tree-construction.html#parsing-main-inbody + // ['dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rp', 'rt', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html'] + if (tree.currentStackItem().localName != 'body') { + tree.parseError('expected-one-end-tag-but-got-another', { + expectedName: tree.currentStackItem().localName, + gotName: name + }); + } + tree.setInsertionMode('afterBody'); + }; + + modes.inBody.endTagHtml = function(name) { + if (!tree.openElements.inScope('body')) { + tree.parseError('unexpected-end-tag', {name: name}); + return; + } + + /// @todo Emit parse error on end tags other than the ones listed in http://www.w3.org/TR/html5/tree-construction.html#parsing-main-inbody + // ['dd', 'dt', 'li', 'optgroup', 'option', 'p', 'rp', 'rt', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'body', 'html'] + if (tree.currentStackItem().localName != 'body') { + tree.parseError('expected-one-end-tag-but-got-another', { + expectedName: tree.currentStackItem().localName, + gotName: name + }); + } + tree.setInsertionMode('afterBody'); + tree.insertionMode.processEndTag(name); + }; + + modes.inBody.endTagBlock = function(name) { + if (!tree.openElements.inScope(name)) { + tree.parseError('unexpected-end-tag', {name: name}); + } else { + tree.generateImpliedEndTags(); + if (tree.currentStackItem().localName != name) { + tree.parseError('end-tag-too-early', {name: name}); + } + tree.openElements.popUntilPopped(name); + } + }; + + modes.inBody.endTagForm = function(name) { + var node = tree.form; + tree.form = null; + if (!node || !tree.openElements.inScope(name)) { + tree.parseError('unexpected-end-tag', {name: name}); + } else { + tree.generateImpliedEndTags(); + if (tree.currentStackItem() != node) { + tree.parseError('end-tag-too-early-ignored', {name: 'form'}); + } + tree.openElements.remove(node); + } + }; + + modes.inBody.endTagListItem = function(name) { + if (!tree.openElements.inListItemScope(name)) { + tree.parseError('unexpected-end-tag', {name: name}); + } else { + tree.generateImpliedEndTags(name); + if (tree.currentStackItem().localName != name) + tree.parseError('end-tag-too-early', {name: name}); + tree.openElements.popUntilPopped(name); + } + }; + + modes.inBody.endTagHeading = function(name) { + if (!tree.openElements.hasNumberedHeaderElementInScope()) { + tree.parseError('unexpected-end-tag', {name: name}); + return; + } + tree.generateImpliedEndTags(); + if (tree.currentStackItem().localName != name) + tree.parseError('end-tag-too-early', {name: name}); + + tree.openElements.remove_openElements_until(function(e) { + return e.isNumberedHeader(); + }); + }; + + modes.inBody.endTagFormatting = function(name, attributes) { + if (!tree.adoptionAgencyEndTag(name)) + this.endTagOther(name, attributes); + }; + + modes.inCaption = Object.create(modes.base); + + modes.inCaption.start_tag_handlers = { + html: 'startTagHtml', + caption: 'startTagTableElement', + col: 'startTagTableElement', + colgroup: 'startTagTableElement', + tbody: 'startTagTableElement', + td: 'startTagTableElement', + tfoot: 'startTagTableElement', + thead: 'startTagTableElement', + tr: 'startTagTableElement', + '-default': 'startTagOther' + }; + + modes.inCaption.end_tag_handlers = { + caption: 'endTagCaption', + table: 'endTagTable', + body: 'endTagIgnore', + col: 'endTagIgnore', + colgroup: 'endTagIgnore', + html: 'endTagIgnore', + tbody: 'endTagIgnore', + td: 'endTagIgnore', + tfood: 'endTagIgnore', + thead: 'endTagIgnore', + tr: 'endTagIgnore', + '-default': 'endTagOther' + }; + + modes.inCaption.processCharacters = function(data) { + modes.inBody.processCharacters(data); + }; + + modes.inCaption.startTagTableElement = function(name, attributes) { + tree.parseError('unexpected-end-tag', {name: name}); + var ignoreEndTag = !tree.openElements.inTableScope('caption'); + tree.insertionMode.processEndTag('caption'); + if (!ignoreEndTag) tree.insertionMode.processStartTag(name, attributes); + }; + + modes.inCaption.startTagOther = function(name, attributes, selfClosing) { + modes.inBody.processStartTag(name, attributes, selfClosing); + }; + + modes.inCaption.endTagCaption = function(name) { + if (!tree.openElements.inTableScope('caption')) { + // context case + assert.ok(tree.context); + tree.parseError('unexpected-end-tag', {name: name}); + } else { + // AT this code is quite similar to endTagTable in inTable + tree.generateImpliedEndTags(); + if (tree.currentStackItem().localName != 'caption') { + // @todo this is confusing for implied end tag + tree.parseError('expected-one-end-tag-but-got-another', { + gotName: "caption", + expectedName: tree.currentStackItem().localName + }); + } + tree.openElements.popUntilPopped('caption'); + tree.clearActiveFormattingElements(); + tree.setInsertionMode('inTable'); + } + }; + + modes.inCaption.endTagTable = function(name) { + tree.parseError("unexpected-end-table-in-caption"); + var ignoreEndTag = !tree.openElements.inTableScope('caption'); + tree.insertionMode.processEndTag('caption'); + if (!ignoreEndTag) tree.insertionMode.processEndTag(name); + }; + + modes.inCaption.endTagIgnore = function(name) { + tree.parseError('unexpected-end-tag', {name: name}); + }; + + modes.inCaption.endTagOther = function(name) { + modes.inBody.processEndTag(name); + }; + + modes.inCell = Object.create(modes.base); + + modes.inCell.start_tag_handlers = { + html: 'startTagHtml', + caption: 'startTagTableOther', + col: 'startTagTableOther', + colgroup: 'startTagTableOther', + tbody: 'startTagTableOther', + td: 'startTagTableOther', + tfoot: 'startTagTableOther', + th: 'startTagTableOther', + thead: 'startTagTableOther', + tr: 'startTagTableOther', + '-default': 'startTagOther' + }; + + modes.inCell.end_tag_handlers = { + td: 'endTagTableCell', + th: 'endTagTableCell', + body: 'endTagIgnore', + caption: 'endTagIgnore', + col: 'endTagIgnore', + colgroup: 'endTagIgnore', + html: 'endTagIgnore', + table: 'endTagImply', + tbody: 'endTagImply', + tfoot: 'endTagImply', + thead: 'endTagImply', + tr: 'endTagImply', + '-default': 'endTagOther' + }; + + modes.inCell.processCharacters = function(data) { + modes.inBody.processCharacters(data); + }; + + modes.inCell.startTagTableOther = function(name, attributes, selfClosing) { + if (tree.openElements.inTableScope('td') || tree.openElements.inTableScope('th')) { + this.closeCell(); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + } else { + // context case + tree.parseError('unexpected-start-tag', {name: name}); + } + }; + + modes.inCell.startTagOther = function(name, attributes, selfClosing) { + modes.inBody.processStartTag(name, attributes, selfClosing); + }; + + modes.inCell.endTagTableCell = function(name) { + if (tree.openElements.inTableScope(name)) { + tree.generateImpliedEndTags(name); + if (tree.currentStackItem().localName != name.toLowerCase()) { + tree.parseError('unexpected-cell-end-tag', {name: name}); + tree.openElements.popUntilPopped(name); + } else { + tree.popElement(); + } + tree.clearActiveFormattingElements(); + tree.setInsertionMode('inRow'); + } else { + tree.parseError('unexpected-end-tag', {name: name}); + } + }; + + modes.inCell.endTagIgnore = function(name) { + tree.parseError('unexpected-end-tag', {name: name}); + }; + + modes.inCell.endTagImply = function(name) { + if (tree.openElements.inTableScope(name)) { + this.closeCell(); + tree.insertionMode.processEndTag(name); + } else { + // sometimes context case + tree.parseError('unexpected-end-tag', {name: name}); + } + }; + + modes.inCell.endTagOther = function(name) { + modes.inBody.processEndTag(name); + }; + + modes.inCell.closeCell = function() { + if (tree.openElements.inTableScope('td')) { + this.endTagTableCell('td'); + } else if (tree.openElements.inTableScope('th')) { + this.endTagTableCell('th'); + } + }; + + + modes.inColumnGroup = Object.create(modes.base); + + modes.inColumnGroup.start_tag_handlers = { + html: 'startTagHtml', + col: 'startTagCol', + '-default': 'startTagOther' + }; + + modes.inColumnGroup.end_tag_handlers = { + colgroup: 'endTagColgroup', + col: 'endTagCol', + '-default': 'endTagOther' + }; + + modes.inColumnGroup.ignoreEndTagColgroup = function() { + return tree.currentStackItem().localName == 'html'; + }; + + modes.inColumnGroup.processCharacters = function(buffer) { + var leadingWhitespace = buffer.takeLeadingWhitespace(); + if (leadingWhitespace) + tree.insertText(leadingWhitespace); + if (!buffer.length) + return; + var ignoreEndTag = this.ignoreEndTagColgroup(); + this.endTagColgroup('colgroup'); + if (!ignoreEndTag) tree.insertionMode.processCharacters(buffer); + }; + + modes.inColumnGroup.startTagCol = function(name, attributes) { + tree.insertSelfClosingElement(name, attributes); + }; + + modes.inColumnGroup.startTagOther = function(name, attributes, selfClosing) { + var ignoreEndTag = this.ignoreEndTagColgroup(); + this.endTagColgroup('colgroup'); + if (!ignoreEndTag) tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.inColumnGroup.endTagColgroup = function(name) { + if (this.ignoreEndTagColgroup()) { + // context case + assert.ok(tree.context); + tree.parseError('unexpected-end-tag', {name: name}); + } else { + tree.popElement(); + tree.setInsertionMode('inTable'); + } + }; + + modes.inColumnGroup.endTagCol = function(name) { + tree.parseError("no-end-tag", {name: 'col'}); + }; + + modes.inColumnGroup.endTagOther = function(name) { + var ignoreEndTag = this.ignoreEndTagColgroup(); + this.endTagColgroup('colgroup'); + if (!ignoreEndTag) tree.insertionMode.processEndTag(name) ; + }; + + modes.inForeignContent = Object.create(modes.base); + + modes.inForeignContent.processStartTag = function(name, attributes, selfClosing) { + if (['b', 'big', 'blockquote', 'body', 'br', 'center', 'code', 'dd', 'div', 'dl', 'dt', 'em', 'embed', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'i', 'img', 'li', 'listing', 'menu', 'meta', 'nobr', 'ol', 'p', 'pre', 'ruby', 's', 'small', 'span', 'strong', 'strike', 'sub', 'sup', 'table', 'tt', 'u', 'ul', 'var'].indexOf(name) != -1 + || (name == 'font' && attributes.some(function(attr){ return ['color', 'face', 'size'].indexOf(attr.nodeName) >= 0 }))) { + tree.parseError('unexpected-html-element-in-foreign-content', {name: name}); + while (tree.currentStackItem().isForeign() + && !tree.currentStackItem().isHtmlIntegrationPoint() + && !tree.currentStackItem().isMathMLTextIntegrationPoint()) { + tree.openElements.pop(); + } + tree.insertionMode.processStartTag(name, attributes, selfClosing); + return; + } + if (tree.currentStackItem().namespaceURI == "http://www.w3.org/1998/Math/MathML") { + attributes = tree.adjustMathMLAttributes(attributes); + } + if (tree.currentStackItem().namespaceURI == "http://www.w3.org/2000/svg") { + name = tree.adjustSVGTagNameCase(name); + attributes = tree.adjustSVGAttributes(attributes); + } + attributes = tree.adjustForeignAttributes(attributes); + tree.insertForeignElement(name, attributes, tree.currentStackItem().namespaceURI, selfClosing); + }; + + modes.inForeignContent.processEndTag = function(name) { + var node = tree.currentStackItem(); + var index = tree.openElements.length - 1; + if (node.localName.toLowerCase() != name) + tree.parseError("unexpected-end-tag", {name: name}); + + while (true) { + if (index === 0) + break; + if (node.localName.toLowerCase() == name) { + while (tree.openElements.pop() != node); + break; + } + index -= 1; + node = tree.openElements.item(index); + if (node.isForeign()) { + continue; + } else { + tree.insertionMode.processEndTag(name); + break; + } + } + }; + + modes.inForeignContent.processCharacters = function(buffer) { + var characters = buffer.takeRemaining(); + characters = characters.replace(/\u0000/g, function(match, index){ + // @todo position + tree.parseError('invalid-codepoint'); + return '\uFFFD'; + }); + if (tree.framesetOk && !isAllWhitespaceOrReplacementCharacters(characters)) + tree.framesetOk = false; + tree.insertText(characters); + }; + + modes.inHeadNoscript = Object.create(modes.base); + + modes.inHeadNoscript.start_tag_handlers = { + html: 'startTagHtml', + basefont: 'startTagBasefontBgsoundLinkMetaNoframesStyle', + bgsound: 'startTagBasefontBgsoundLinkMetaNoframesStyle', + link: 'startTagBasefontBgsoundLinkMetaNoframesStyle', + meta: 'startTagBasefontBgsoundLinkMetaNoframesStyle', + noframes: 'startTagBasefontBgsoundLinkMetaNoframesStyle', + style: 'startTagBasefontBgsoundLinkMetaNoframesStyle', + head: 'startTagHeadNoscript', + noscript: 'startTagHeadNoscript', + "-default": 'startTagOther' + }; + + modes.inHeadNoscript.end_tag_handlers = { + noscript: 'endTagNoscript', + br: 'endTagBr', + '-default': 'endTagOther' + }; + + modes.inHeadNoscript.processCharacters = function(buffer) { + var leadingWhitespace = buffer.takeLeadingWhitespace(); + if (leadingWhitespace) + tree.insertText(leadingWhitespace); + if (!buffer.length) + return; + // FIXME error message + tree.parseError("unexpected-char-in-frameset"); + this.anythingElse(); + tree.insertionMode.processCharacters(buffer); + }; + + modes.inHeadNoscript.processComment = function(data) { + modes.inHead.processComment(data); + }; + + modes.inHeadNoscript.startTagHtml = function(name, attributes) { + modes.inBody.processStartTag(name, attributes); + }; + + modes.inHeadNoscript.startTagBasefontBgsoundLinkMetaNoframesStyle = function(name, attributes) { + modes.inHead.processStartTag(name, attributes); + }; + + modes.inHeadNoscript.startTagHeadNoscript = function(name, attributes) { + // FIXME error message + tree.parseError("unexpected-start-tag-in-frameset", {name: name}); + }; + + modes.inHeadNoscript.startTagOther = function(name, attributes) { + // FIXME error message + tree.parseError("unexpected-start-tag-in-frameset", {name: name}); + this.anythingElse(); + tree.insertionMode.processStartTag(name, attributes); + }; + + modes.inHeadNoscript.endTagBr = function(name, attributes) { + // FIXME error message + tree.parseError("unexpected-end-tag-in-frameset", {name: name}); + this.anythingElse(); + tree.insertionMode.processEndTag(name, attributes); + }; + + modes.inHeadNoscript.endTagNoscript = function(name, attributes) { + tree.popElement(); + tree.setInsertionMode('inHead'); + }; + + modes.inHeadNoscript.endTagOther = function(name, attributes) { + // FIXME error message + tree.parseError("unexpected-end-tag-in-frameset", {name: name}); + }; + + modes.inHeadNoscript.anythingElse = function() { + tree.popElement(); + tree.setInsertionMode('inHead'); + }; + + + modes.inFrameset = Object.create(modes.base); + + modes.inFrameset.start_tag_handlers = { + html: 'startTagHtml', + frameset: 'startTagFrameset', + frame: 'startTagFrame', + noframes: 'startTagNoframes', + "-default": 'startTagOther' + }; + + modes.inFrameset.end_tag_handlers = { + frameset: 'endTagFrameset', + noframes: 'endTagNoframes', + '-default': 'endTagOther' + }; + + modes.inFrameset.processCharacters = function(data) { + tree.parseError("unexpected-char-in-frameset"); + }; + + modes.inFrameset.startTagFrameset = function(name, attributes) { + tree.insertElement(name, attributes); + }; + + modes.inFrameset.startTagFrame = function(name, attributes) { + tree.insertSelfClosingElement(name, attributes); + }; + + modes.inFrameset.startTagNoframes = function(name, attributes) { + modes.inBody.processStartTag(name, attributes); + }; + + modes.inFrameset.startTagOther = function(name, attributes) { + tree.parseError("unexpected-start-tag-in-frameset", {name: name}); + }; + + modes.inFrameset.endTagFrameset = function(name, attributes) { + if (tree.currentStackItem().localName == 'html') { + // context case + tree.parseError("unexpected-frameset-in-frameset-innerhtml"); + } else { + tree.popElement(); + } + + if (!tree.context && tree.currentStackItem().localName != 'frameset') { + // If we're not in context mode an the current node is not a "frameset" element (anymore) then switch + tree.setInsertionMode('afterFrameset'); + } + }; + + modes.inFrameset.endTagNoframes = function(name) { + modes.inBody.processEndTag(name); + }; + + modes.inFrameset.endTagOther = function(name) { + tree.parseError("unexpected-end-tag-in-frameset", {name: name}); + }; + + modes.inTable = Object.create(modes.base); + + modes.inTable.start_tag_handlers = { + html: 'startTagHtml', + caption: 'startTagCaption', + colgroup: 'startTagColgroup', + col: 'startTagCol', + table: 'startTagTable', + tbody: 'startTagRowGroup', + tfoot: 'startTagRowGroup', + thead: 'startTagRowGroup', + td: 'startTagImplyTbody', + th: 'startTagImplyTbody', + tr: 'startTagImplyTbody', + style: 'startTagStyleScript', + script: 'startTagStyleScript', + input: 'startTagInput', + form: 'startTagForm', + '-default': 'startTagOther' + }; + + modes.inTable.end_tag_handlers = { + table: 'endTagTable', + body: 'endTagIgnore', + caption: 'endTagIgnore', + col: 'endTagIgnore', + colgroup: 'endTagIgnore', + html: 'endTagIgnore', + tbody: 'endTagIgnore', + td: 'endTagIgnore', + tfoot: 'endTagIgnore', + th: 'endTagIgnore', + thead: 'endTagIgnore', + tr: 'endTagIgnore', + '-default': 'endTagOther' + }; + + modes.inTable.processCharacters = function(data) { + if (tree.currentStackItem().isFosterParenting()) { + var originalInsertionMode = tree.insertionModeName; + tree.setInsertionMode('inTableText'); + tree.originalInsertionMode = originalInsertionMode; + tree.insertionMode.processCharacters(data); + } else { + tree.redirectAttachToFosterParent = true; + modes.inBody.processCharacters(data); + tree.redirectAttachToFosterParent = false; + } + }; + + modes.inTable.startTagCaption = function(name, attributes) { + tree.openElements.popUntilTableScopeMarker(); + tree.activeFormattingElements.push(Marker); + tree.insertElement(name, attributes); + tree.setInsertionMode('inCaption'); + }; + + modes.inTable.startTagColgroup = function(name, attributes) { + tree.openElements.popUntilTableScopeMarker(); + tree.insertElement(name, attributes); + tree.setInsertionMode('inColumnGroup'); + }; + + modes.inTable.startTagCol = function(name, attributes) { + this.startTagColgroup('colgroup', []); + tree.insertionMode.processStartTag(name, attributes); + }; + + modes.inTable.startTagRowGroup = function(name, attributes) { + tree.openElements.popUntilTableScopeMarker(); + tree.insertElement(name, attributes); + tree.setInsertionMode('inTableBody'); + }; + + modes.inTable.startTagImplyTbody = function(name, attributes) { + this.startTagRowGroup('tbody', []); + tree.insertionMode.processStartTag(name, attributes); + }; + + modes.inTable.startTagTable = function(name, attributes) { + tree.parseError("unexpected-start-tag-implies-end-tag", + {startName: "table", endName: "table"}); + tree.insertionMode.processEndTag('table'); + if (!tree.context) tree.insertionMode.processStartTag(name, attributes); + }; + + modes.inTable.startTagStyleScript = function(name, attributes) { + modes.inHead.processStartTag(name, attributes); + }; + + modes.inTable.startTagInput = function(name, attributes) { + for (var key in attributes) { + if (attributes[key].nodeName.toLowerCase() == 'type') { + if (attributes[key].nodeValue.toLowerCase() == 'hidden') { + tree.parseError("unexpected-hidden-input-in-table"); + tree.insertElement(name, attributes); + // XXX associate with form + tree.openElements.pop(); + return; + } + break; + } + } + this.startTagOther(name, attributes); + }; + + modes.inTable.startTagForm = function(name, attributes) { + tree.parseError("unexpected-form-in-table"); + if (!tree.form) { + tree.insertElement(name, attributes); + tree.form = tree.currentStackItem(); + tree.openElements.pop(); + } + }; + + modes.inTable.startTagOther = function(name, attributes, selfClosing) { + tree.parseError("unexpected-start-tag-implies-table-voodoo", {name: name}); + tree.redirectAttachToFosterParent = true; + modes.inBody.processStartTag(name, attributes, selfClosing); + tree.redirectAttachToFosterParent = false; + }; + + modes.inTable.endTagTable = function(name) { + if (tree.openElements.inTableScope(name)) { + tree.generateImpliedEndTags(); + if (tree.currentStackItem().localName != name) { + tree.parseError("end-tag-too-early-named", {gotName: 'table', expectedName: tree.currentStackItem().localName}); + } + + tree.openElements.popUntilPopped('table'); + tree.resetInsertionMode(); + } else { + assert.ok(tree.context); + tree.parseError('unexpected-end-tag', {name: name}); + } + }; + + modes.inTable.endTagIgnore = function(name) { + tree.parseError("unexpected-end-tag", {name: name}); + }; + + modes.inTable.endTagOther = function(name) { + tree.parseError("unexpected-end-tag-implies-table-voodoo", {name: name}); + // Make all the special element rearranging voodoo kick in + tree.redirectAttachToFosterParent = true; + // Process the end tag in the "in body" mode + modes.inBody.processEndTag(name); + tree.redirectAttachToFosterParent = false; + }; + + modes.inTableText = Object.create(modes.base); + + modes.inTableText.flushCharacters = function() { + var characters = tree.pendingTableCharacters.join(''); + if (!isAllWhitespace(characters)) { + tree.redirectAttachToFosterParent = true; + tree.reconstructActiveFormattingElements(); + tree.insertText(characters); + tree.framesetOk = false; + tree.redirectAttachToFosterParent = false; + } else { + tree.insertText(characters); + } + tree.pendingTableCharacters = []; + }; + + modes.inTableText.processComment = function(data) { + this.flushCharacters(); + tree.setInsertionMode(tree.originalInsertionMode); + tree.insertionMode.processComment(data); + }; + + modes.inTableText.processEOF = function(data) { + this.flushCharacters(); + tree.setInsertionMode(tree.originalInsertionMode); + tree.insertionMode.processEOF(); + }; + + modes.inTableText.processCharacters = function(buffer) { + var characters = buffer.takeRemaining(); + characters = characters.replace(/\u0000/g, function(match, index){ + // @todo position + tree.parseError("invalid-codepoint"); + return ''; + }); + if (!characters) + return; + tree.pendingTableCharacters.push(characters); + }; + + modes.inTableText.processStartTag = function(name, attributes, selfClosing) { + this.flushCharacters(); + tree.setInsertionMode(tree.originalInsertionMode); + tree.insertionMode.processStartTag(name, attributes, selfClosing); + }; + + modes.inTableText.processEndTag = function(name, attributes) { + this.flushCharacters(); + tree.setInsertionMode(tree.originalInsertionMode); + tree.insertionMode.processEndTag(name, attributes); + }; + + modes.inTableBody = Object.create(modes.base); + + modes.inTableBody.start_tag_handlers = { + html: 'startTagHtml', + tr: 'startTagTr', + td: 'startTagTableCell', + th: 'startTagTableCell', + caption: 'startTagTableOther', + col: 'startTagTableOther', + colgroup: 'startTagTableOther', + tbody: 'startTagTableOther', + tfoot: 'startTagTableOther', + thead: 'startTagTableOther', + '-default': 'startTagOther' + }; + + modes.inTableBody.end_tag_handlers = { + table: 'endTagTable', + tbody: 'endTagTableRowGroup', + tfoot: 'endTagTableRowGroup', + thead: 'endTagTableRowGroup', + body: 'endTagIgnore', + caption: 'endTagIgnore', + col: 'endTagIgnore', + colgroup: 'endTagIgnore', + html: 'endTagIgnore', + td: 'endTagIgnore', + th: 'endTagIgnore', + tr: 'endTagIgnore', + '-default': 'endTagOther' + }; + + modes.inTableBody.processCharacters = function(data) { + modes.inTable.processCharacters(data); + }; + + modes.inTableBody.startTagTr = function(name, attributes) { + tree.openElements.popUntilTableBodyScopeMarker(); + tree.insertElement(name, attributes); + tree.setInsertionMode('inRow'); + }; + + modes.inTableBody.startTagTableCell = function(name, attributes) { + tree.parseError("unexpected-cell-in-table-body", {name: name}); + this.startTagTr('tr', []); + tree.insertionMode.processStartTag(name, attributes); + }; + + modes.inTableBody.startTagTableOther = function(name, attributes) { + // XXX any ideas on how to share this with endTagTable + if (tree.openElements.inTableScope('tbody') || tree.openElements.inTableScope('thead') || tree.openElements.inTableScope('tfoot')) { + tree.openElements.popUntilTableBodyScopeMarker(); + this.endTagTableRowGroup(tree.currentStackItem().localName); + tree.insertionMode.processStartTag(name, attributes); + } else { + // context case + tree.parseError('unexpected-start-tag', {name: name}); + } + }; + + modes.inTableBody.startTagOther = function(name, attributes) { + modes.inTable.processStartTag(name, attributes); + }; + + modes.inTableBody.endTagTableRowGroup = function(name) { + if (tree.openElements.inTableScope(name)) { + tree.openElements.popUntilTableBodyScopeMarker(); + tree.popElement(); + tree.setInsertionMode('inTable'); + } else { + tree.parseError('unexpected-end-tag-in-table-body', {name: name}); + } + }; + + modes.inTableBody.endTagTable = function(name) { + if (tree.openElements.inTableScope('tbody') || tree.openElements.inTableScope('thead') || tree.openElements.inTableScope('tfoot')) { + tree.openElements.popUntilTableBodyScopeMarker(); + this.endTagTableRowGroup(tree.currentStackItem().localName); + tree.insertionMode.processEndTag(name); + } else { + // context case + tree.parseError('unexpected-end-tag', {name: name}); + } + }; + + modes.inTableBody.endTagIgnore = function(name) { + tree.parseError("unexpected-end-tag-in-table-body", {name: name}); + }; + + modes.inTableBody.endTagOther = function(name) { + modes.inTable.processEndTag(name); + }; + + modes.inSelect = Object.create(modes.base); + + modes.inSelect.start_tag_handlers = { + html: 'startTagHtml', + option: 'startTagOption', + optgroup: 'startTagOptgroup', + select: 'startTagSelect', + input: 'startTagInput', + keygen: 'startTagInput', + textarea: 'startTagInput', + script: 'startTagScript', + '-default': 'startTagOther' + }; + + modes.inSelect.end_tag_handlers = { + option: 'endTagOption', + optgroup: 'endTagOptgroup', + select: 'endTagSelect', + caption: 'endTagTableElements', + table: 'endTagTableElements', + tbody: 'endTagTableElements', + tfoot: 'endTagTableElements', + thead: 'endTagTableElements', + tr: 'endTagTableElements', + td: 'endTagTableElements', + th: 'endTagTableElements', + '-default': 'endTagOther' + }; + + modes.inSelect.processCharacters = function(buffer) { + var data = buffer.takeRemaining(); + data = data.replace(/\u0000/g, function(match, index){ + // @todo position + tree.parseError("invalid-codepoint"); + return ''; + }); + if (!data) + return; + tree.insertText(data); + }; + + modes.inSelect.startTagOption = function(name, attributes) { + // we need to imply if