diff options
author | Florian Dold <florian.dold@gmail.com> | 2016-11-16 01:59:39 +0100 |
---|---|---|
committer | Florian Dold <florian.dold@gmail.com> | 2016-11-16 02:00:31 +0100 |
commit | bd65bb67e25a79b019d745b7262b2008ce2adb15 (patch) | |
tree | 89e1b032103a63737f1a703e6a943832ef261704 /node_modules/sax/lib | |
parent | f91466595b651721690133f58ab37f977539e95b (diff) |
incrementally verify denoms
The denominations are not stored in a separate object store.
Diffstat (limited to 'node_modules/sax/lib')
-rw-r--r-- | node_modules/sax/lib/sax.js | 2758 |
1 files changed, 1462 insertions, 1296 deletions
diff --git a/node_modules/sax/lib/sax.js b/node_modules/sax/lib/sax.js index 410a50748..f125c5fee 100644 --- a/node_modules/sax/lib/sax.js +++ b/node_modules/sax/lib/sax.js @@ -1,1410 +1,1576 @@ -// wrapper for non-node envs -;(function (sax) { - -sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } -sax.SAXParser = SAXParser -sax.SAXStream = SAXStream -sax.createStream = createStream - -// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. -// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), -// since that's the earliest that a buffer overrun could occur. This way, checks are -// as rare as required, but as often as necessary to ensure never crossing this bound. -// Furthermore, buffers are only tested at most once per write(), so passing a very -// large string into write() might have undesirable effects, but this is manageable by -// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme -// edge case, result in creating at most one complete copy of the string passed in. -// Set to Infinity to have unlimited buffers. -sax.MAX_BUFFER_LENGTH = 64 * 1024 - -var buffers = [ - "comment", "sgmlDecl", "textNode", "tagName", "doctype", - "procInstName", "procInstBody", "entity", "attribName", - "attribValue", "cdata", "script" -] - -sax.EVENTS = // for discoverability. - [ "text" - , "processinginstruction" - , "sgmldeclaration" - , "doctype" - , "comment" - , "attribute" - , "opentag" - , "closetag" - , "opencdata" - , "cdata" - , "closecdata" - , "error" - , "end" - , "ready" - , "script" - , "opennamespace" - , "closenamespace" +;(function (sax) { // wrapper for non-node envs + sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } + sax.SAXParser = SAXParser + sax.SAXStream = SAXStream + sax.createStream = createStream + + // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. + // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), + // since that's the earliest that a buffer overrun could occur. This way, checks are + // as rare as required, but as often as necessary to ensure never crossing this bound. + // Furthermore, buffers are only tested at most once per write(), so passing a very + // large string into write() might have undesirable effects, but this is manageable by + // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme + // edge case, result in creating at most one complete copy of the string passed in. + // Set to Infinity to have unlimited buffers. + sax.MAX_BUFFER_LENGTH = 64 * 1024 + + var buffers = [ + 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', + 'procInstName', 'procInstBody', 'entity', 'attribName', + 'attribValue', 'cdata', 'script' ] -function SAXParser (strict, opt) { - if (!(this instanceof SAXParser)) return new SAXParser(strict, opt) - - var parser = this - clearBuffers(parser) - parser.q = parser.c = "" - parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH - parser.opt = opt || {} - parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags - parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase" - parser.tags = [] - parser.closed = parser.closedRoot = parser.sawRoot = false - parser.tag = parser.error = null - parser.strict = !!strict - parser.noscript = !!(strict || parser.opt.noscript) - parser.state = S.BEGIN - parser.ENTITIES = Object.create(sax.ENTITIES) - parser.attribList = [] - - // namespaces form a prototype chain. - // it always points at the current tag, - // which protos to its parent tag. - if (parser.opt.xmlns) parser.ns = Object.create(rootNS) - - // mostly just for error reporting - parser.trackPosition = parser.opt.position !== false - if (parser.trackPosition) { - parser.position = parser.line = parser.column = 0 - } - emit(parser, "onready") -} - -if (!Object.create) Object.create = function (o) { - function f () { this.__proto__ = o } - f.prototype = o - return new f -} - -if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) { - return o.__proto__ -} - -if (!Object.keys) Object.keys = function (o) { - var a = [] - for (var i in o) if (o.hasOwnProperty(i)) a.push(i) - return a -} - -function checkBufferLength (parser) { - var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) - , maxActual = 0 - for (var i = 0, l = buffers.length; i < l; i ++) { - var len = parser[buffers[i]].length - if (len > maxAllowed) { - // Text/cdata nodes can get big, and since they're buffered, - // we can get here under normal conditions. - // Avoid issues by emitting the text node now, - // so at least it won't get any bigger. - switch (buffers[i]) { - case "textNode": - closeText(parser) - break + sax.EVENTS = [ + 'text', + 'processinginstruction', + 'sgmldeclaration', + 'doctype', + 'comment', + 'opentagstart', + 'attribute', + 'opentag', + 'closetag', + 'opencdata', + 'cdata', + 'closecdata', + 'error', + 'end', + 'ready', + 'script', + 'opennamespace', + 'closenamespace' + ] - case "cdata": - emitNode(parser, "oncdata", parser.cdata) - parser.cdata = "" - break + function SAXParser (strict, opt) { + if (!(this instanceof SAXParser)) { + return new SAXParser(strict, opt) + } - case "script": - emitNode(parser, "onscript", parser.script) - parser.script = "" - break + var parser = this + clearBuffers(parser) + parser.q = parser.c = '' + parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH + parser.opt = opt || {} + parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags + parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase' + parser.tags = [] + parser.closed = parser.closedRoot = parser.sawRoot = false + parser.tag = parser.error = null + parser.strict = !!strict + parser.noscript = !!(strict || parser.opt.noscript) + parser.state = S.BEGIN + parser.strictEntities = parser.opt.strictEntities + parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES) + parser.attribList = [] + + // namespaces form a prototype chain. + // it always points at the current tag, + // which protos to its parent tag. + if (parser.opt.xmlns) { + parser.ns = Object.create(rootNS) + } - default: - error(parser, "Max buffer length exceeded: "+buffers[i]) - } + // mostly just for error reporting + parser.trackPosition = parser.opt.position !== false + if (parser.trackPosition) { + parser.position = parser.line = parser.column = 0 } - maxActual = Math.max(maxActual, len) - } - // schedule the next check for the earliest possible buffer overrun. - parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual) - + parser.position -} - -function clearBuffers (parser) { - for (var i = 0, l = buffers.length; i < l; i ++) { - parser[buffers[i]] = "" + emit(parser, 'onready') } -} -function flushBuffers (parser) { - closeText(parser) - if (parser.cdata !== "") { - emitNode(parser, "oncdata", parser.cdata) - parser.cdata = "" + if (!Object.create) { + Object.create = function (o) { + function F () {} + F.prototype = o + var newf = new F() + return newf + } } - if (parser.script !== "") { - emitNode(parser, "onscript", parser.script) - parser.script = "" + + if (!Object.keys) { + Object.keys = function (o) { + var a = [] + for (var i in o) if (o.hasOwnProperty(i)) a.push(i) + return a + } } -} - -SAXParser.prototype = - { end: function () { end(this) } - , write: write - , resume: function () { this.error = null; return this } - , close: function () { return this.write(null) } - , flush: function () { flushBuffers(this) } + + function checkBufferLength (parser) { + var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) + var maxActual = 0 + for (var i = 0, l = buffers.length; i < l; i++) { + var len = parser[buffers[i]].length + if (len > maxAllowed) { + // Text/cdata nodes can get big, and since they're buffered, + // we can get here under normal conditions. + // Avoid issues by emitting the text node now, + // so at least it won't get any bigger. + switch (buffers[i]) { + case 'textNode': + closeText(parser) + break + + case 'cdata': + emitNode(parser, 'oncdata', parser.cdata) + parser.cdata = '' + break + + case 'script': + emitNode(parser, 'onscript', parser.script) + parser.script = '' + break + + default: + error(parser, 'Max buffer length exceeded: ' + buffers[i]) + } + } + maxActual = Math.max(maxActual, len) + } + // schedule the next check for the earliest possible buffer overrun. + var m = sax.MAX_BUFFER_LENGTH - maxActual + parser.bufferCheckPosition = m + parser.position } -try { - var Stream = require("stream").Stream -} catch (ex) { - var Stream = function () {} -} + function clearBuffers (parser) { + for (var i = 0, l = buffers.length; i < l; i++) { + parser[buffers[i]] = '' + } + } + function flushBuffers (parser) { + closeText(parser) + if (parser.cdata !== '') { + emitNode(parser, 'oncdata', parser.cdata) + parser.cdata = '' + } + if (parser.script !== '') { + emitNode(parser, 'onscript', parser.script) + parser.script = '' + } + } -var streamWraps = sax.EVENTS.filter(function (ev) { - return ev !== "error" && ev !== "end" -}) + SAXParser.prototype = { + end: function () { end(this) }, + write: write, + resume: function () { this.error = null; return this }, + close: function () { return this.write(null) }, + flush: function () { flushBuffers(this) } + } -function createStream (strict, opt) { - return new SAXStream(strict, opt) -} + var Stream + try { + Stream = require('stream').Stream + } catch (ex) { + Stream = function () {} + } -function SAXStream (strict, opt) { - if (!(this instanceof SAXStream)) return new SAXStream(strict, opt) + var streamWraps = sax.EVENTS.filter(function (ev) { + return ev !== 'error' && ev !== 'end' + }) - Stream.apply(this) + function createStream (strict, opt) { + return new SAXStream(strict, opt) + } - this._parser = new SAXParser(strict, opt) - this.writable = true - this.readable = true + function SAXStream (strict, opt) { + if (!(this instanceof SAXStream)) { + return new SAXStream(strict, opt) + } + Stream.apply(this) - var me = this + this._parser = new SAXParser(strict, opt) + this.writable = true + this.readable = true - this._parser.onend = function () { - me.emit("end") - } + var me = this - this._parser.onerror = function (er) { - me.emit("error", er) + this._parser.onend = function () { + me.emit('end') + } - // if didn't throw, then means error was handled. - // go ahead and clear error, so we can write again. - me._parser.error = null - } + this._parser.onerror = function (er) { + me.emit('error', er) - this._decoder = null; + // if didn't throw, then means error was handled. + // go ahead and clear error, so we can write again. + me._parser.error = null + } - streamWraps.forEach(function (ev) { - Object.defineProperty(me, "on" + ev, { - get: function () { return me._parser["on" + ev] }, - set: function (h) { - if (!h) { - me.removeAllListeners(ev) - return me._parser["on"+ev] = h - } - me.on(ev, h) - }, - enumerable: true, - configurable: false + this._decoder = null + + streamWraps.forEach(function (ev) { + Object.defineProperty(me, 'on' + ev, { + get: function () { + return me._parser['on' + ev] + }, + set: function (h) { + if (!h) { + me.removeAllListeners(ev) + me._parser['on' + ev] = h + return h + } + me.on(ev, h) + }, + enumerable: true, + configurable: false + }) }) - }) -} + } -SAXStream.prototype = Object.create(Stream.prototype, - { constructor: { value: SAXStream } }) + SAXStream.prototype = Object.create(Stream.prototype, { + constructor: { + value: SAXStream + } + }) -SAXStream.prototype.write = function (data) { - if (typeof Buffer === 'function' && + SAXStream.prototype.write = function (data) { + if (typeof Buffer === 'function' && typeof Buffer.isBuffer === 'function' && Buffer.isBuffer(data)) { - if (!this._decoder) { - var SD = require('string_decoder').StringDecoder - this._decoder = new SD('utf8') + if (!this._decoder) { + var SD = require('string_decoder').StringDecoder + this._decoder = new SD('utf8') + } + data = this._decoder.write(data) } - data = this._decoder.write(data); + + this._parser.write(data.toString()) + this.emit('data', data) + return true } - this._parser.write(data.toString()) - this.emit("data", data) - return true -} - -SAXStream.prototype.end = function (chunk) { - if (chunk && chunk.length) this.write(chunk) - this._parser.end() - return true -} - -SAXStream.prototype.on = function (ev, handler) { - var me = this - if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) { - me._parser["on"+ev] = function () { - var args = arguments.length === 1 ? [arguments[0]] - : Array.apply(null, arguments) - args.splice(0, 0, ev) - me.emit.apply(me, args) + SAXStream.prototype.end = function (chunk) { + if (chunk && chunk.length) { + this.write(chunk) } + this._parser.end() + return true } - return Stream.prototype.on.call(me, ev, handler) -} + SAXStream.prototype.on = function (ev, handler) { + var me = this + if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) { + me._parser['on' + ev] = function () { + var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments) + args.splice(0, 0, ev) + me.emit.apply(me, args) + } + } + return Stream.prototype.on.call(me, ev, handler) + } + // character classes and tokens + var whitespace = '\r\n\t ' -// character classes and tokens -var whitespace = "\r\n\t " // this really needs to be replaced with character classes. // XML allows all manner of ridiculous numbers and digits. - , number = "0124356789" - , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + var number = '0124356789' + var letter = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + // (Letter | "_" | ":") - , quote = "'\"" - , entity = number+letter+"#" - , attribEnd = whitespace + ">" - , CDATA = "[CDATA[" - , DOCTYPE = "DOCTYPE" - , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace" - , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/" - , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } - -// turn all the string character sets into character class objects. -whitespace = charClass(whitespace) -number = charClass(number) -letter = charClass(letter) - -// http://www.w3.org/TR/REC-xml/#NT-NameStartChar -// This implementation works on strings, a single character at a time -// as such, it cannot ever support astral-plane characters (10000-EFFFF) -// without a significant breaking change to either this parser, or the -// JavaScript language. Implementation of an emoji-capable xml parser -// is left as an exercise for the reader. -var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ - -var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/ - -quote = charClass(quote) -entity = charClass(entity) -attribEnd = charClass(attribEnd) - -function charClass (str) { - return str.split("").reduce(function (s, c) { - s[c] = true - return s - }, {}) -} - -function isRegExp (c) { - return Object.prototype.toString.call(c) === '[object RegExp]' -} - -function is (charclass, c) { - return isRegExp(charclass) ? !!c.match(charclass) : charclass[c] -} - -function not (charclass, c) { - return !is(charclass, c) -} - -var S = 0 -sax.STATE = -{ BEGIN : S++ -, TEXT : S++ // general stuff -, TEXT_ENTITY : S++ // & and such. -, OPEN_WAKA : S++ // < -, SGML_DECL : S++ // <!BLARG -, SGML_DECL_QUOTED : S++ // <!BLARG foo "bar -, DOCTYPE : S++ // <!DOCTYPE -, DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah -, DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ... -, DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo -, COMMENT_STARTING : S++ // <!- -, COMMENT : S++ // <!-- -, COMMENT_ENDING : S++ // <!-- blah - -, COMMENT_ENDED : S++ // <!-- blah -- -, CDATA : S++ // <![CDATA[ something -, CDATA_ENDING : S++ // ] -, CDATA_ENDING_2 : S++ // ]] -, PROC_INST : S++ // <?hi -, PROC_INST_BODY : S++ // <?hi there -, PROC_INST_ENDING : S++ // <?hi "there" ? -, OPEN_TAG : S++ // <strong -, OPEN_TAG_SLASH : S++ // <strong / -, ATTRIB : S++ // <a -, ATTRIB_NAME : S++ // <a foo -, ATTRIB_NAME_SAW_WHITE : S++ // <a foo _ -, ATTRIB_VALUE : S++ // <a foo= -, ATTRIB_VALUE_QUOTED : S++ // <a foo="bar -, ATTRIB_VALUE_CLOSED : S++ // <a foo="bar" -, ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar -, ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar=""" -, ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=" -, CLOSE_TAG : S++ // </a -, CLOSE_TAG_SAW_WHITE : S++ // </a > -, SCRIPT : S++ // <script> ... -, SCRIPT_ENDING : S++ // <script> ... < -} - -sax.ENTITIES = -{ "amp" : "&" -, "gt" : ">" -, "lt" : "<" -, "quot" : "\"" -, "apos" : "'" -, "AElig" : 198 -, "Aacute" : 193 -, "Acirc" : 194 -, "Agrave" : 192 -, "Aring" : 197 -, "Atilde" : 195 -, "Auml" : 196 -, "Ccedil" : 199 -, "ETH" : 208 -, "Eacute" : 201 -, "Ecirc" : 202 -, "Egrave" : 200 -, "Euml" : 203 -, "Iacute" : 205 -, "Icirc" : 206 -, "Igrave" : 204 -, "Iuml" : 207 -, "Ntilde" : 209 -, "Oacute" : 211 -, "Ocirc" : 212 -, "Ograve" : 210 -, "Oslash" : 216 -, "Otilde" : 213 -, "Ouml" : 214 -, "THORN" : 222 -, "Uacute" : 218 -, "Ucirc" : 219 -, "Ugrave" : 217 -, "Uuml" : 220 -, "Yacute" : 221 -, "aacute" : 225 -, "acirc" : 226 -, "aelig" : 230 -, "agrave" : 224 -, "aring" : 229 -, "atilde" : 227 -, "auml" : 228 -, "ccedil" : 231 -, "eacute" : 233 -, "ecirc" : 234 -, "egrave" : 232 -, "eth" : 240 -, "euml" : 235 -, "iacute" : 237 -, "icirc" : 238 -, "igrave" : 236 -, "iuml" : 239 -, "ntilde" : 241 -, "oacute" : 243 -, "ocirc" : 244 -, "ograve" : 242 -, "oslash" : 248 -, "otilde" : 245 -, "ouml" : 246 -, "szlig" : 223 -, "thorn" : 254 -, "uacute" : 250 -, "ucirc" : 251 -, "ugrave" : 249 -, "uuml" : 252 -, "yacute" : 253 -, "yuml" : 255 -, "copy" : 169 -, "reg" : 174 -, "nbsp" : 160 -, "iexcl" : 161 -, "cent" : 162 -, "pound" : 163 -, "curren" : 164 -, "yen" : 165 -, "brvbar" : 166 -, "sect" : 167 -, "uml" : 168 -, "ordf" : 170 -, "laquo" : 171 -, "not" : 172 -, "shy" : 173 -, "macr" : 175 -, "deg" : 176 -, "plusmn" : 177 -, "sup1" : 185 -, "sup2" : 178 -, "sup3" : 179 -, "acute" : 180 -, "micro" : 181 -, "para" : 182 -, "middot" : 183 -, "cedil" : 184 -, "ordm" : 186 -, "raquo" : 187 -, "frac14" : 188 -, "frac12" : 189 -, "frac34" : 190 -, "iquest" : 191 -, "times" : 215 -, "divide" : 247 -, "OElig" : 338 -, "oelig" : 339 -, "Scaron" : 352 -, "scaron" : 353 -, "Yuml" : 376 -, "fnof" : 402 -, "circ" : 710 -, "tilde" : 732 -, "Alpha" : 913 -, "Beta" : 914 -, "Gamma" : 915 -, "Delta" : 916 -, "Epsilon" : 917 -, "Zeta" : 918 -, "Eta" : 919 -, "Theta" : 920 -, "Iota" : 921 -, "Kappa" : 922 -, "Lambda" : 923 -, "Mu" : 924 -, "Nu" : 925 -, "Xi" : 926 -, "Omicron" : 927 -, "Pi" : 928 -, "Rho" : 929 -, "Sigma" : 931 -, "Tau" : 932 -, "Upsilon" : 933 -, "Phi" : 934 -, "Chi" : 935 -, "Psi" : 936 -, "Omega" : 937 -, "alpha" : 945 -, "beta" : 946 -, "gamma" : 947 -, "delta" : 948 -, "epsilon" : 949 -, "zeta" : 950 -, "eta" : 951 -, "theta" : 952 -, "iota" : 953 -, "kappa" : 954 -, "lambda" : 955 -, "mu" : 956 -, "nu" : 957 -, "xi" : 958 -, "omicron" : 959 -, "pi" : 960 -, "rho" : 961 -, "sigmaf" : 962 -, "sigma" : 963 -, "tau" : 964 -, "upsilon" : 965 -, "phi" : 966 -, "chi" : 967 -, "psi" : 968 -, "omega" : 969 -, "thetasym" : 977 -, "upsih" : 978 -, "piv" : 982 -, "ensp" : 8194 -, "emsp" : 8195 -, "thinsp" : 8201 -, "zwnj" : 8204 -, "zwj" : 8205 -, "lrm" : 8206 -, "rlm" : 8207 -, "ndash" : 8211 -, "mdash" : 8212 -, "lsquo" : 8216 -, "rsquo" : 8217 -, "sbquo" : 8218 -, "ldquo" : 8220 -, "rdquo" : 8221 -, "bdquo" : 8222 -, "dagger" : 8224 -, "Dagger" : 8225 -, "bull" : 8226 -, "hellip" : 8230 -, "permil" : 8240 -, "prime" : 8242 -, "Prime" : 8243 -, "lsaquo" : 8249 -, "rsaquo" : 8250 -, "oline" : 8254 -, "frasl" : 8260 -, "euro" : 8364 -, "image" : 8465 -, "weierp" : 8472 -, "real" : 8476 -, "trade" : 8482 -, "alefsym" : 8501 -, "larr" : 8592 -, "uarr" : 8593 -, "rarr" : 8594 -, "darr" : 8595 -, "harr" : 8596 -, "crarr" : 8629 -, "lArr" : 8656 -, "uArr" : 8657 -, "rArr" : 8658 -, "dArr" : 8659 -, "hArr" : 8660 -, "forall" : 8704 -, "part" : 8706 -, "exist" : 8707 -, "empty" : 8709 -, "nabla" : 8711 -, "isin" : 8712 -, "notin" : 8713 -, "ni" : 8715 -, "prod" : 8719 -, "sum" : 8721 -, "minus" : 8722 -, "lowast" : 8727 -, "radic" : 8730 -, "prop" : 8733 -, "infin" : 8734 -, "ang" : 8736 -, "and" : 8743 -, "or" : 8744 -, "cap" : 8745 -, "cup" : 8746 -, "int" : 8747 -, "there4" : 8756 -, "sim" : 8764 -, "cong" : 8773 -, "asymp" : 8776 -, "ne" : 8800 -, "equiv" : 8801 -, "le" : 8804 -, "ge" : 8805 -, "sub" : 8834 -, "sup" : 8835 -, "nsub" : 8836 -, "sube" : 8838 -, "supe" : 8839 -, "oplus" : 8853 -, "otimes" : 8855 -, "perp" : 8869 -, "sdot" : 8901 -, "lceil" : 8968 -, "rceil" : 8969 -, "lfloor" : 8970 -, "rfloor" : 8971 -, "lang" : 9001 -, "rang" : 9002 -, "loz" : 9674 -, "spades" : 9824 -, "clubs" : 9827 -, "hearts" : 9829 -, "diams" : 9830 -} - -Object.keys(sax.ENTITIES).forEach(function (key) { + var quote = '\'"' + var attribEnd = whitespace + '>' + var CDATA = '[CDATA[' + var DOCTYPE = 'DOCTYPE' + var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' + var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/' + var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } + + // turn all the string character sets into character class objects. + whitespace = charClass(whitespace) + number = charClass(number) + letter = charClass(letter) + + // http://www.w3.org/TR/REC-xml/#NT-NameStartChar + // This implementation works on strings, a single character at a time + // as such, it cannot ever support astral-plane characters (10000-EFFFF) + // without a significant breaking change to either this parser, or the + // JavaScript language. Implementation of an emoji-capable xml parser + // is left as an exercise for the reader. + var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ + + var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/ + + var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ + var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/ + + quote = charClass(quote) + attribEnd = charClass(attribEnd) + + function charClass (str) { + return str.split('').reduce(function (s, c) { + s[c] = true + return s + }, {}) + } + + function isRegExp (c) { + return Object.prototype.toString.call(c) === '[object RegExp]' + } + + function is (charclass, c) { + return isRegExp(charclass) ? !!c.match(charclass) : charclass[c] + } + + function not (charclass, c) { + return !is(charclass, c) + } + + var S = 0 + sax.STATE = { + BEGIN: S++, // leading byte order mark or whitespace + BEGIN_WHITESPACE: S++, // leading whitespace + TEXT: S++, // general stuff + TEXT_ENTITY: S++, // & and such. + OPEN_WAKA: S++, // < + SGML_DECL: S++, // <!BLARG + SGML_DECL_QUOTED: S++, // <!BLARG foo "bar + DOCTYPE: S++, // <!DOCTYPE + DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah + DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ... + DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo + COMMENT_STARTING: S++, // <!- + COMMENT: S++, // <!-- + COMMENT_ENDING: S++, // <!-- blah - + COMMENT_ENDED: S++, // <!-- blah -- + CDATA: S++, // <![CDATA[ something + CDATA_ENDING: S++, // ] + CDATA_ENDING_2: S++, // ]] + PROC_INST: S++, // <?hi + PROC_INST_BODY: S++, // <?hi there + PROC_INST_ENDING: S++, // <?hi "there" ? + OPEN_TAG: S++, // <strong + OPEN_TAG_SLASH: S++, // <strong / + ATTRIB: S++, // <a + ATTRIB_NAME: S++, // <a foo + ATTRIB_NAME_SAW_WHITE: S++, // <a foo _ + ATTRIB_VALUE: S++, // <a foo= + ATTRIB_VALUE_QUOTED: S++, // <a foo="bar + ATTRIB_VALUE_CLOSED: S++, // <a foo="bar" + ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar + ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar=""" + ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=" + CLOSE_TAG: S++, // </a + CLOSE_TAG_SAW_WHITE: S++, // </a > + SCRIPT: S++, // <script> ... + SCRIPT_ENDING: S++ // <script> ... < + } + + sax.XML_ENTITIES = { + 'amp': '&', + 'gt': '>', + 'lt': '<', + 'quot': '"', + 'apos': "'" + } + + sax.ENTITIES = { + 'amp': '&', + 'gt': '>', + 'lt': '<', + 'quot': '"', + 'apos': "'", + 'AElig': 198, + 'Aacute': 193, + 'Acirc': 194, + 'Agrave': 192, + 'Aring': 197, + 'Atilde': 195, + 'Auml': 196, + 'Ccedil': 199, + 'ETH': 208, + 'Eacute': 201, + 'Ecirc': 202, + 'Egrave': 200, + 'Euml': 203, + 'Iacute': 205, + 'Icirc': 206, + 'Igrave': 204, + 'Iuml': 207, + 'Ntilde': 209, + 'Oacute': 211, + 'Ocirc': 212, + 'Ograve': 210, + 'Oslash': 216, + 'Otilde': 213, + 'Ouml': 214, + 'THORN': 222, + 'Uacute': 218, + 'Ucirc': 219, + 'Ugrave': 217, + 'Uuml': 220, + 'Yacute': 221, + 'aacute': 225, + 'acirc': 226, + 'aelig': 230, + 'agrave': 224, + 'aring': 229, + 'atilde': 227, + 'auml': 228, + 'ccedil': 231, + 'eacute': 233, + 'ecirc': 234, + 'egrave': 232, + 'eth': 240, + 'euml': 235, + 'iacute': 237, + 'icirc': 238, + 'igrave': 236, + 'iuml': 239, + 'ntilde': 241, + 'oacute': 243, + 'ocirc': 244, + 'ograve': 242, + 'oslash': 248, + 'otilde': 245, + 'ouml': 246, + 'szlig': 223, + 'thorn': 254, + 'uacute': 250, + 'ucirc': 251, + 'ugrave': 249, + 'uuml': 252, + 'yacute': 253, + 'yuml': 255, + 'copy': 169, + 'reg': 174, + 'nbsp': 160, + 'iexcl': 161, + 'cent': 162, + 'pound': 163, + 'curren': 164, + 'yen': 165, + 'brvbar': 166, + 'sect': 167, + 'uml': 168, + 'ordf': 170, + 'laquo': 171, + 'not': 172, + 'shy': 173, + 'macr': 175, + 'deg': 176, + 'plusmn': 177, + 'sup1': 185, + 'sup2': 178, + 'sup3': 179, + 'acute': 180, + 'micro': 181, + 'para': 182, + 'middot': 183, + 'cedil': 184, + 'ordm': 186, + 'raquo': 187, + 'frac14': 188, + 'frac12': 189, + 'frac34': 190, + 'iquest': 191, + 'times': 215, + 'divide': 247, + 'OElig': 338, + 'oelig': 339, + 'Scaron': 352, + 'scaron': 353, + 'Yuml': 376, + 'fnof': 402, + 'circ': 710, + 'tilde': 732, + 'Alpha': 913, + 'Beta': 914, + 'Gamma': 915, + 'Delta': 916, + 'Epsilon': 917, + 'Zeta': 918, + 'Eta': 919, + 'Theta': 920, + 'Iota': 921, + 'Kappa': 922, + 'Lambda': 923, + 'Mu': 924, + 'Nu': 925, + 'Xi': 926, + 'Omicron': 927, + 'Pi': 928, + 'Rho': 929, + 'Sigma': 931, + 'Tau': 932, + 'Upsilon': 933, + 'Phi': 934, + 'Chi': 935, + 'Psi': 936, + 'Omega': 937, + 'alpha': 945, + 'beta': 946, + 'gamma': 947, + 'delta': 948, + 'epsilon': 949, + 'zeta': 950, + 'eta': 951, + 'theta': 952, + 'iota': 953, + 'kappa': 954, + 'lambda': 955, + 'mu': 956, + 'nu': 957, + 'xi': 958, + 'omicron': 959, + 'pi': 960, + 'rho': 961, + 'sigmaf': 962, + 'sigma': 963, + 'tau': 964, + 'upsilon': 965, + 'phi': 966, + 'chi': 967, + 'psi': 968, + 'omega': 969, + 'thetasym': 977, + 'upsih': 978, + 'piv': 982, + 'ensp': 8194, + 'emsp': 8195, + 'thinsp': 8201, + 'zwnj': 8204, + 'zwj': 8205, + 'lrm': 8206, + 'rlm': 8207, + 'ndash': 8211, + 'mdash': 8212, + 'lsquo': 8216, + 'rsquo': 8217, + 'sbquo': 8218, + 'ldquo': 8220, + 'rdquo': 8221, + 'bdquo': 8222, + 'dagger': 8224, + 'Dagger': 8225, + 'bull': 8226, + 'hellip': 8230, + 'permil': 8240, + 'prime': 8242, + 'Prime': 8243, + 'lsaquo': 8249, + 'rsaquo': 8250, + 'oline': 8254, + 'frasl': 8260, + 'euro': 8364, + 'image': 8465, + 'weierp': 8472, + 'real': 8476, + 'trade': 8482, + 'alefsym': 8501, + 'larr': 8592, + 'uarr': 8593, + 'rarr': 8594, + 'darr': 8595, + 'harr': 8596, + 'crarr': 8629, + 'lArr': 8656, + 'uArr': 8657, + 'rArr': 8658, + 'dArr': 8659, + 'hArr': 8660, + 'forall': 8704, + 'part': 8706, + 'exist': 8707, + 'empty': 8709, + 'nabla': 8711, + 'isin': 8712, + 'notin': 8713, + 'ni': 8715, + 'prod': 8719, + 'sum': 8721, + 'minus': 8722, + 'lowast': 8727, + 'radic': 8730, + 'prop': 8733, + 'infin': 8734, + 'ang': 8736, + 'and': 8743, + 'or': 8744, + 'cap': 8745, + 'cup': 8746, + 'int': 8747, + 'there4': 8756, + 'sim': 8764, + 'cong': 8773, + 'asymp': 8776, + 'ne': 8800, + 'equiv': 8801, + 'le': 8804, + 'ge': 8805, + 'sub': 8834, + 'sup': 8835, + 'nsub': 8836, + 'sube': 8838, + 'supe': 8839, + 'oplus': 8853, + 'otimes': 8855, + 'perp': 8869, + 'sdot': 8901, + 'lceil': 8968, + 'rceil': 8969, + 'lfloor': 8970, + 'rfloor': 8971, + 'lang': 9001, + 'rang': 9002, + 'loz': 9674, + 'spades': 9824, + 'clubs': 9827, + 'hearts': 9829, + 'diams': 9830 + } + + Object.keys(sax.ENTITIES).forEach(function (key) { var e = sax.ENTITIES[key] var s = typeof e === 'number' ? String.fromCharCode(e) : e sax.ENTITIES[key] = s -}) - -for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S - -// shorthand -S = sax.STATE - -function emit (parser, event, data) { - parser[event] && parser[event](data) -} - -function emitNode (parser, nodeType, data) { - if (parser.textNode) closeText(parser) - emit(parser, nodeType, data) -} - -function closeText (parser) { - parser.textNode = textopts(parser.opt, parser.textNode) - if (parser.textNode) emit(parser, "ontext", parser.textNode) - parser.textNode = "" -} - -function textopts (opt, text) { - if (opt.trim) text = text.trim() - if (opt.normalize) text = text.replace(/\s+/g, " ") - return text -} - -function error (parser, er) { - closeText(parser) - if (parser.trackPosition) { - er += "\nLine: "+parser.line+ - "\nColumn: "+parser.column+ - "\nChar: "+parser.c + }) + + for (var s in sax.STATE) { + sax.STATE[sax.STATE[s]] = s } - er = new Error(er) - parser.error = er - emit(parser, "onerror", er) - return parser -} - -function end (parser) { - if (!parser.closedRoot) strictFail(parser, "Unclosed root tag") - if ((parser.state !== S.BEGIN) && (parser.state !== S.TEXT)) error(parser, "Unexpected end") - closeText(parser) - parser.c = "" - parser.closed = true - emit(parser, "onend") - SAXParser.call(parser, parser.strict, parser.opt) - return parser -} - -function strictFail (parser, message) { - if (typeof parser !== 'object' || !(parser instanceof SAXParser)) - throw new Error('bad call to strictFail'); - if (parser.strict) error(parser, message) -} - -function newTag (parser) { - if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() - var parent = parser.tags[parser.tags.length - 1] || parser - , tag = parser.tag = { name : parser.tagName, attributes : {} } - - // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" - if (parser.opt.xmlns) tag.ns = parent.ns - parser.attribList.length = 0 -} - -function qname (name, attribute) { - var i = name.indexOf(":") - , qualName = i < 0 ? [ "", name ] : name.split(":") - , prefix = qualName[0] - , local = qualName[1] - - // <x "xmlns"="http://foo"> - if (attribute && name === "xmlns") { - prefix = "xmlns" - local = "" + + // shorthand + S = sax.STATE + + function emit (parser, event, data) { + parser[event] && parser[event](data) } - return { prefix: prefix, local: local } -} + function emitNode (parser, nodeType, data) { + if (parser.textNode) closeText(parser) + emit(parser, nodeType, data) + } -function attrib (parser) { - if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]() + function closeText (parser) { + parser.textNode = textopts(parser.opt, parser.textNode) + if (parser.textNode) emit(parser, 'ontext', parser.textNode) + parser.textNode = '' + } - if (parser.attribList.indexOf(parser.attribName) !== -1 || - parser.tag.attributes.hasOwnProperty(parser.attribName)) { - return parser.attribName = parser.attribValue = "" + function textopts (opt, text) { + if (opt.trim) text = text.trim() + if (opt.normalize) text = text.replace(/\s+/g, ' ') + return text } - if (parser.opt.xmlns) { - var qn = qname(parser.attribName, true) - , prefix = qn.prefix - , local = qn.local - - if (prefix === "xmlns") { - // namespace binding attribute; push the binding into scope - if (local === "xml" && parser.attribValue !== XML_NAMESPACE) { - strictFail( parser - , "xml: prefix must be bound to " + XML_NAMESPACE + "\n" - + "Actual: " + parser.attribValue ) - } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) { - strictFail( parser - , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n" - + "Actual: " + parser.attribValue ) - } else { - var tag = parser.tag - , parent = parser.tags[parser.tags.length - 1] || parser - if (tag.ns === parent.ns) { - tag.ns = Object.create(parent.ns) - } - tag.ns[local] = parser.attribValue - } + function error (parser, er) { + closeText(parser) + if (parser.trackPosition) { + er += '\nLine: ' + parser.line + + '\nColumn: ' + parser.column + + '\nChar: ' + parser.c } + er = new Error(er) + parser.error = er + emit(parser, 'onerror', er) + return parser + } - // defer onattribute events until all attributes have been seen - // so any new bindings can take effect; preserve attribute order - // so deferred events can be emitted in document order - parser.attribList.push([parser.attribName, parser.attribValue]) - } else { - // in non-xmlns mode, we can emit the event right away - parser.tag.attributes[parser.attribName] = parser.attribValue - emitNode( parser - , "onattribute" - , { name: parser.attribName - , value: parser.attribValue } ) + function end (parser) { + if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag') + if ((parser.state !== S.BEGIN) && + (parser.state !== S.BEGIN_WHITESPACE) && + (parser.state !== S.TEXT)) { + error(parser, 'Unexpected end') + } + closeText(parser) + parser.c = '' + parser.closed = true + emit(parser, 'onend') + SAXParser.call(parser, parser.strict, parser.opt) + return parser } - parser.attribName = parser.attribValue = "" -} + function strictFail (parser, message) { + if (typeof parser !== 'object' || !(parser instanceof SAXParser)) { + throw new Error('bad call to strictFail') + } + if (parser.strict) { + error(parser, message) + } + } -function openTag (parser, selfClosing) { - if (parser.opt.xmlns) { - // emit namespace binding events - var tag = parser.tag + function newTag (parser) { + if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]() + var parent = parser.tags[parser.tags.length - 1] || parser + var tag = parser.tag = { name: parser.tagName, attributes: {} } + + // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar" + if (parser.opt.xmlns) { + tag.ns = parent.ns + } + parser.attribList.length = 0 + emitNode(parser, 'onopentagstart', tag) + } - // add namespace info to tag - var qn = qname(parser.tagName) - tag.prefix = qn.prefix - tag.local = qn.local - tag.uri = tag.ns[qn.prefix] || "" + function qname (name, attribute) { + var i = name.indexOf(':') + var qualName = i < 0 ? [ '', name ] : name.split(':') + var prefix = qualName[0] + var local = qualName[1] - if (tag.prefix && !tag.uri) { - strictFail(parser, "Unbound namespace prefix: " - + JSON.stringify(parser.tagName)) - tag.uri = qn.prefix + // <x "xmlns"="http://foo"> + if (attribute && name === 'xmlns') { + prefix = 'xmlns' + local = '' } - var parent = parser.tags[parser.tags.length - 1] || parser - if (tag.ns && parent.ns !== tag.ns) { - Object.keys(tag.ns).forEach(function (p) { - emitNode( parser - , "onopennamespace" - , { prefix: p , uri: tag.ns[p] } ) - }) + return { prefix: prefix, local: local } + } + + function attrib (parser) { + if (!parser.strict) { + parser.attribName = parser.attribName[parser.looseCase]() } - // handle deferred onattribute events - // Note: do not apply default ns to attributes: - // http://www.w3.org/TR/REC-xml-names/#defaulting - for (var i = 0, l = parser.attribList.length; i < l; i ++) { - var nv = parser.attribList[i] - var name = nv[0] - , value = nv[1] - , qualName = qname(name, true) - , prefix = qualName.prefix - , local = qualName.local - , uri = prefix == "" ? "" : (tag.ns[prefix] || "") - , a = { name: name - , value: value - , prefix: prefix - , local: local - , uri: uri - } + if (parser.attribList.indexOf(parser.attribName) !== -1 || + parser.tag.attributes.hasOwnProperty(parser.attribName)) { + parser.attribName = parser.attribValue = '' + return + } - // if there's any attributes with an undefined namespace, - // then fail on them now. - if (prefix && prefix != "xmlns" && !uri) { - strictFail(parser, "Unbound namespace prefix: " - + JSON.stringify(prefix)) - a.uri = prefix + if (parser.opt.xmlns) { + var qn = qname(parser.attribName, true) + var prefix = qn.prefix + var local = qn.local + + if (prefix === 'xmlns') { + // namespace binding attribute. push the binding into scope + if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) { + strictFail(parser, + 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' + + 'Actual: ' + parser.attribValue) + } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) { + strictFail(parser, + 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' + + 'Actual: ' + parser.attribValue) + } else { + var tag = parser.tag + var parent = parser.tags[parser.tags.length - 1] || parser + if (tag.ns === parent.ns) { + tag.ns = Object.create(parent.ns) + } + tag.ns[local] = parser.attribValue + } } - parser.tag.attributes[name] = a - emitNode(parser, "onattribute", a) + + // defer onattribute events until all attributes have been seen + // so any new bindings can take effect. preserve attribute order + // so deferred events can be emitted in document order + parser.attribList.push([parser.attribName, parser.attribValue]) + } else { + // in non-xmlns mode, we can emit the event right away + parser.tag.attributes[parser.attribName] = parser.attribValue + emitNode(parser, 'onattribute', { + name: parser.attribName, + value: parser.attribValue + }) } - parser.attribList.length = 0 + + parser.attribName = parser.attribValue = '' } - parser.tag.isSelfClosing = !!selfClosing + function openTag (parser, selfClosing) { + if (parser.opt.xmlns) { + // emit namespace binding events + var tag = parser.tag + + // add namespace info to tag + var qn = qname(parser.tagName) + tag.prefix = qn.prefix + tag.local = qn.local + tag.uri = tag.ns[qn.prefix] || '' + + if (tag.prefix && !tag.uri) { + strictFail(parser, 'Unbound namespace prefix: ' + + JSON.stringify(parser.tagName)) + tag.uri = qn.prefix + } - // process the tag - parser.sawRoot = true - parser.tags.push(parser.tag) - emitNode(parser, "onopentag", parser.tag) - if (!selfClosing) { - // special case for <script> in non-strict mode. - if (!parser.noscript && parser.tagName.toLowerCase() === "script") { - parser.state = S.SCRIPT - } else { - parser.state = S.TEXT + var parent = parser.tags[parser.tags.length - 1] || parser + if (tag.ns && parent.ns !== tag.ns) { + Object.keys(tag.ns).forEach(function (p) { + emitNode(parser, 'onopennamespace', { + prefix: p, + uri: tag.ns[p] + }) + }) + } + + // handle deferred onattribute events + // Note: do not apply default ns to attributes: + // http://www.w3.org/TR/REC-xml-names/#defaulting + for (var i = 0, l = parser.attribList.length; i < l; i++) { + var nv = parser.attribList[i] + var name = nv[0] + var value = nv[1] + var qualName = qname(name, true) + var prefix = qualName.prefix + var local = qualName.local + var uri = prefix === '' ? '' : (tag.ns[prefix] || '') + var a = { + name: name, + value: value, + prefix: prefix, + local: local, + uri: uri + } + + // if there's any attributes with an undefined namespace, + // then fail on them now. + if (prefix && prefix !== 'xmlns' && !uri) { + strictFail(parser, 'Unbound namespace prefix: ' + + JSON.stringify(prefix)) + a.uri = prefix + } + parser.tag.attributes[name] = a + emitNode(parser, 'onattribute', a) + } + parser.attribList.length = 0 } - parser.tag = null - parser.tagName = "" - } - parser.attribName = parser.attribValue = "" - parser.attribList.length = 0 -} - -function closeTag (parser) { - if (!parser.tagName) { - strictFail(parser, "Weird empty close tag.") - parser.textNode += "</>" - parser.state = S.TEXT - return + + parser.tag.isSelfClosing = !!selfClosing + + // process the tag + parser.sawRoot = true + parser.tags.push(parser.tag) + emitNode(parser, 'onopentag', parser.tag) + if (!selfClosing) { + // special case for <script> in non-strict mode. + if (!parser.noscript && parser.tagName.toLowerCase() === 'script') { + parser.state = S.SCRIPT + } else { + parser.state = S.TEXT + } + parser.tag = null + parser.tagName = '' + } + parser.attribName = parser.attribValue = '' + parser.attribList.length = 0 } - if (parser.script) { - if (parser.tagName !== "script") { - parser.script += "</" + parser.tagName + ">" - parser.tagName = "" - parser.state = S.SCRIPT + function closeTag (parser) { + if (!parser.tagName) { + strictFail(parser, 'Weird empty close tag.') + parser.textNode += '</>' + parser.state = S.TEXT return } - emitNode(parser, "onscript", parser.script) - parser.script = "" - } - // first make sure that the closing tag actually exists. - // <a><b></c></b></a> will close everything, otherwise. - var t = parser.tags.length - var tagName = parser.tagName - if (!parser.strict) tagName = tagName[parser.looseCase]() - var closeTo = tagName - while (t --) { - var close = parser.tags[t] - if (close.name !== closeTo) { - // fail the first time in strict mode - strictFail(parser, "Unexpected close tag") - } else break - } + if (parser.script) { + if (parser.tagName !== 'script') { + parser.script += '</' + parser.tagName + '>' + parser.tagName = '' + parser.state = S.SCRIPT + return + } + emitNode(parser, 'onscript', parser.script) + parser.script = '' + } + + // first make sure that the closing tag actually exists. + // <a><b></c></b></a> will close everything, otherwise. + var t = parser.tags.length + var tagName = parser.tagName + if (!parser.strict) { + tagName = tagName[parser.looseCase]() + } + var closeTo = tagName + while (t--) { + var close = parser.tags[t] + if (close.name !== closeTo) { + // fail the first time in strict mode + strictFail(parser, 'Unexpected close tag') + } else { + break + } + } - // didn't find it. we already failed for strict, so just abort. - if (t < 0) { - strictFail(parser, "Unmatched closing tag: "+parser.tagName) - parser.textNode += "</" + parser.tagName + ">" + // didn't find it. we already failed for strict, so just abort. + if (t < 0) { + strictFail(parser, 'Unmatched closing tag: ' + parser.tagName) + parser.textNode += '</' + parser.tagName + '>' + parser.state = S.TEXT + return + } + parser.tagName = tagName + var s = parser.tags.length + while (s-- > t) { + var tag = parser.tag = parser.tags.pop() + parser.tagName = parser.tag.name + emitNode(parser, 'onclosetag', parser.tagName) + + var x = {} + for (var i in tag.ns) { + x[i] = tag.ns[i] + } + + var parent = parser.tags[parser.tags.length - 1] || parser + if (parser.opt.xmlns && tag.ns !== parent.ns) { + // remove namespace bindings introduced by tag + Object.keys(tag.ns).forEach(function (p) { + var n = tag.ns[p] + emitNode(parser, 'onclosenamespace', { prefix: p, uri: n }) + }) + } + } + if (t === 0) parser.closedRoot = true + parser.tagName = parser.attribValue = parser.attribName = '' + parser.attribList.length = 0 parser.state = S.TEXT - return } - parser.tagName = tagName - var s = parser.tags.length - while (s --> t) { - var tag = parser.tag = parser.tags.pop() - parser.tagName = parser.tag.name - emitNode(parser, "onclosetag", parser.tagName) - var x = {} - for (var i in tag.ns) x[i] = tag.ns[i] + function parseEntity (parser) { + var entity = parser.entity + var entityLC = entity.toLowerCase() + var num + var numStr = '' - var parent = parser.tags[parser.tags.length - 1] || parser - if (parser.opt.xmlns && tag.ns !== parent.ns) { - // remove namespace bindings introduced by tag - Object.keys(tag.ns).forEach(function (p) { - var n = tag.ns[p] - emitNode(parser, "onclosenamespace", { prefix: p, uri: n }) - }) + if (parser.ENTITIES[entity]) { + return parser.ENTITIES[entity] + } + if (parser.ENTITIES[entityLC]) { + return parser.ENTITIES[entityLC] + } + entity = entityLC + if (entity.charAt(0) === '#') { + if (entity.charAt(1) === 'x') { + entity = entity.slice(2) + num = parseInt(entity, 16) + numStr = num.toString(16) + } else { + entity = entity.slice(1) + num = parseInt(entity, 10) + numStr = num.toString(10) + } } + entity = entity.replace(/^0+/, '') + if (numStr.toLowerCase() !== entity) { + strictFail(parser, 'Invalid character entity') + return '&' + parser.entity + ';' + } + + return String.fromCodePoint(num) } - if (t === 0) parser.closedRoot = true - parser.tagName = parser.attribValue = parser.attribName = "" - parser.attribList.length = 0 - parser.state = S.TEXT -} - -function parseEntity (parser) { - var entity = parser.entity - , entityLC = entity.toLowerCase() - , num - , numStr = "" - if (parser.ENTITIES[entity]) - return parser.ENTITIES[entity] - if (parser.ENTITIES[entityLC]) - return parser.ENTITIES[entityLC] - entity = entityLC - if (entity.charAt(0) === "#") { - if (entity.charAt(1) === "x") { - entity = entity.slice(2) - num = parseInt(entity, 16) - numStr = num.toString(16) - } else { - entity = entity.slice(1) - num = parseInt(entity, 10) - numStr = num.toString(10) + + function beginWhiteSpace (parser, c) { + if (c === '<') { + parser.state = S.OPEN_WAKA + parser.startTagPosition = parser.position + } else if (not(whitespace, c)) { + // have to process this as a text node. + // weird, but happens. + strictFail(parser, 'Non-whitespace before first tag.') + parser.textNode = c + parser.state = S.TEXT } } - entity = entity.replace(/^0+/, "") - if (numStr.toLowerCase() !== entity) { - strictFail(parser, "Invalid character entity") - return "&"+parser.entity + ";" + + function charAt (chunk, i) { + var result = '' + if (i < chunk.length) { + result = chunk.charAt(i) + } + return result } - return String.fromCodePoint(num) -} - -function write (chunk) { - var parser = this - if (this.error) throw this.error - if (parser.closed) return error(parser, - "Cannot write after close. Assign an onready handler.") - if (chunk === null) return end(parser) - var i = 0, c = "" - while (parser.c = c = chunk.charAt(i++)) { - if (parser.trackPosition) { - parser.position ++ - if (c === "\n") { - parser.line ++ - parser.column = 0 - } else parser.column ++ + function write (chunk) { + var parser = this + if (this.error) { + throw this.error + } + if (parser.closed) { + return error(parser, + 'Cannot write after close. Assign an onready handler.') + } + if (chunk === null) { + return end(parser) + } + if (typeof chunk === 'object') { + chunk = chunk.toString() } - switch (parser.state) { - - case S.BEGIN: - if (c === "<") { - parser.state = S.OPEN_WAKA - parser.startTagPosition = parser.position - } else if (not(whitespace,c)) { - // have to process this as a text node. - // weird, but happens. - strictFail(parser, "Non-whitespace before first tag.") - parser.textNode = c - parser.state = S.TEXT + var i = 0 + var c = '' + while (true) { + c = charAt(chunk, i++) + parser.c = c + if (!c) { + break + } + if (parser.trackPosition) { + parser.position++ + if (c === '\n') { + parser.line++ + parser.column = 0 + } else { + parser.column++ } - continue - - case S.TEXT: - if (parser.sawRoot && !parser.closedRoot) { - var starti = i-1 - while (c && c!=="<" && c!=="&") { - c = chunk.charAt(i++) - if (c && parser.trackPosition) { - parser.position ++ - if (c === "\n") { - parser.line ++ - parser.column = 0 - } else parser.column ++ + } + switch (parser.state) { + case S.BEGIN: + parser.state = S.BEGIN_WHITESPACE + if (c === '\uFEFF') { + continue + } + beginWhiteSpace(parser, c) + continue + + case S.BEGIN_WHITESPACE: + beginWhiteSpace(parser, c) + continue + + case S.TEXT: + if (parser.sawRoot && !parser.closedRoot) { + var starti = i - 1 + while (c && c !== '<' && c !== '&') { + c = charAt(chunk, i++) + if (c && parser.trackPosition) { + parser.position++ + if (c === '\n') { + parser.line++ + parser.column = 0 + } else { + parser.column++ + } + } } + parser.textNode += chunk.substring(starti, i - 1) } - parser.textNode += chunk.substring(starti, i-1) - } - if (c === "<") { - parser.state = S.OPEN_WAKA - parser.startTagPosition = parser.position - } else { - if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) - strictFail(parser, "Text data outside of root node.") - if (c === "&") parser.state = S.TEXT_ENTITY - else parser.textNode += c - } - continue - - case S.SCRIPT: - // only non-strict - if (c === "<") { - parser.state = S.SCRIPT_ENDING - } else parser.script += c - continue - - case S.SCRIPT_ENDING: - if (c === "/") { - parser.state = S.CLOSE_TAG - } else { - parser.script += "<" + c - parser.state = S.SCRIPT - } - continue - - case S.OPEN_WAKA: - // either a /, ?, !, or text is coming next. - if (c === "!") { - parser.state = S.SGML_DECL - parser.sgmlDecl = "" - } else if (is(whitespace, c)) { - // wait for it... - } else if (is(nameStart,c)) { - parser.state = S.OPEN_TAG - parser.tagName = c - } else if (c === "/") { - parser.state = S.CLOSE_TAG - parser.tagName = "" - } else if (c === "?") { - parser.state = S.PROC_INST - parser.procInstName = parser.procInstBody = "" - } else { - strictFail(parser, "Unencoded <") - // if there was some whitespace, then add that in. - if (parser.startTagPosition + 1 < parser.position) { - var pad = parser.position - parser.startTagPosition - c = new Array(pad).join(" ") + c + if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) { + parser.state = S.OPEN_WAKA + parser.startTagPosition = parser.position + } else { + if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) { + strictFail(parser, 'Text data outside of root node.') + } + if (c === '&') { + parser.state = S.TEXT_ENTITY + } else { + parser.textNode += c + } + } + continue + + case S.SCRIPT: + // only non-strict + if (c === '<') { + parser.state = S.SCRIPT_ENDING + } else { + parser.script += c + } + continue + + case S.SCRIPT_ENDING: + if (c === '/') { + parser.state = S.CLOSE_TAG + } else { + parser.script += '<' + c + parser.state = S.SCRIPT + } + continue + + case S.OPEN_WAKA: + // either a /, ?, !, or text is coming next. + if (c === '!') { + parser.state = S.SGML_DECL + parser.sgmlDecl = '' + } else if (is(whitespace, c)) { + // wait for it... + } else if (is(nameStart, c)) { + parser.state = S.OPEN_TAG + parser.tagName = c + } else if (c === '/') { + parser.state = S.CLOSE_TAG + parser.tagName = '' + } else if (c === '?') { + parser.state = S.PROC_INST + parser.procInstName = parser.procInstBody = '' + } else { + strictFail(parser, 'Unencoded <') + // if there was some whitespace, then add that in. + if (parser.startTagPosition + 1 < parser.position) { + var pad = parser.position - parser.startTagPosition + c = new Array(pad).join(' ') + c + } + parser.textNode += '<' + c + parser.state = S.TEXT + } + continue + + case S.SGML_DECL: + if ((parser.sgmlDecl + c).toUpperCase() === CDATA) { + emitNode(parser, 'onopencdata') + parser.state = S.CDATA + parser.sgmlDecl = '' + parser.cdata = '' + } else if (parser.sgmlDecl + c === '--') { + parser.state = S.COMMENT + parser.comment = '' + parser.sgmlDecl = '' + } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) { + parser.state = S.DOCTYPE + if (parser.doctype || parser.sawRoot) { + strictFail(parser, + 'Inappropriately located doctype declaration') + } + parser.doctype = '' + parser.sgmlDecl = '' + } else if (c === '>') { + emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl) + parser.sgmlDecl = '' + parser.state = S.TEXT + } else if (is(quote, c)) { + parser.state = S.SGML_DECL_QUOTED + parser.sgmlDecl += c + } else { + parser.sgmlDecl += c + } + continue + + case S.SGML_DECL_QUOTED: + if (c === parser.q) { + parser.state = S.SGML_DECL + parser.q = '' } - parser.textNode += "<" + c - parser.state = S.TEXT - } - continue - - case S.SGML_DECL: - if ((parser.sgmlDecl+c).toUpperCase() === CDATA) { - emitNode(parser, "onopencdata") - parser.state = S.CDATA - parser.sgmlDecl = "" - parser.cdata = "" - } else if (parser.sgmlDecl+c === "--") { - parser.state = S.COMMENT - parser.comment = "" - parser.sgmlDecl = "" - } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) { - parser.state = S.DOCTYPE - if (parser.doctype || parser.sawRoot) strictFail(parser, - "Inappropriately located doctype declaration") - parser.doctype = "" - parser.sgmlDecl = "" - } else if (c === ">") { - emitNode(parser, "onsgmldeclaration", parser.sgmlDecl) - parser.sgmlDecl = "" - parser.state = S.TEXT - } else if (is(quote, c)) { - parser.state = S.SGML_DECL_QUOTED parser.sgmlDecl += c - } else parser.sgmlDecl += c - continue + continue - case S.SGML_DECL_QUOTED: - if (c === parser.q) { - parser.state = S.SGML_DECL - parser.q = "" - } - parser.sgmlDecl += c - continue - - case S.DOCTYPE: - if (c === ">") { - parser.state = S.TEXT - emitNode(parser, "ondoctype", parser.doctype) - parser.doctype = true // just remember that we saw it. - } else { + case S.DOCTYPE: + if (c === '>') { + parser.state = S.TEXT + emitNode(parser, 'ondoctype', parser.doctype) + parser.doctype = true // just remember that we saw it. + } else { + parser.doctype += c + if (c === '[') { + parser.state = S.DOCTYPE_DTD + } else if (is(quote, c)) { + parser.state = S.DOCTYPE_QUOTED + parser.q = c + } + } + continue + + case S.DOCTYPE_QUOTED: parser.doctype += c - if (c === "[") parser.state = S.DOCTYPE_DTD - else if (is(quote, c)) { - parser.state = S.DOCTYPE_QUOTED + if (c === parser.q) { + parser.q = '' + parser.state = S.DOCTYPE + } + continue + + case S.DOCTYPE_DTD: + parser.doctype += c + if (c === ']') { + parser.state = S.DOCTYPE + } else if (is(quote, c)) { + parser.state = S.DOCTYPE_DTD_QUOTED parser.q = c } - } - continue + continue - case S.DOCTYPE_QUOTED: - parser.doctype += c - if (c === parser.q) { - parser.q = "" - parser.state = S.DOCTYPE - } - continue - - case S.DOCTYPE_DTD: - parser.doctype += c - if (c === "]") parser.state = S.DOCTYPE - else if (is(quote,c)) { - parser.state = S.DOCTYPE_DTD_QUOTED - parser.q = c - } - continue + case S.DOCTYPE_DTD_QUOTED: + parser.doctype += c + if (c === parser.q) { + parser.state = S.DOCTYPE_DTD + parser.q = '' + } + continue - case S.DOCTYPE_DTD_QUOTED: - parser.doctype += c - if (c === parser.q) { - parser.state = S.DOCTYPE_DTD - parser.q = "" - } - continue - - case S.COMMENT: - if (c === "-") parser.state = S.COMMENT_ENDING - else parser.comment += c - continue - - case S.COMMENT_ENDING: - if (c === "-") { - parser.state = S.COMMENT_ENDED - parser.comment = textopts(parser.opt, parser.comment) - if (parser.comment) emitNode(parser, "oncomment", parser.comment) - parser.comment = "" - } else { - parser.comment += "-" + c - parser.state = S.COMMENT - } - continue - - case S.COMMENT_ENDED: - if (c !== ">") { - strictFail(parser, "Malformed comment") - // allow <!-- blah -- bloo --> in non-strict mode, - // which is a comment of " blah -- bloo " - parser.comment += "--" + c - parser.state = S.COMMENT - } else parser.state = S.TEXT - continue - - case S.CDATA: - if (c === "]") parser.state = S.CDATA_ENDING - else parser.cdata += c - continue - - case S.CDATA_ENDING: - if (c === "]") parser.state = S.CDATA_ENDING_2 - else { - parser.cdata += "]" + c - parser.state = S.CDATA - } - continue - - case S.CDATA_ENDING_2: - if (c === ">") { - if (parser.cdata) emitNode(parser, "oncdata", parser.cdata) - emitNode(parser, "onclosecdata") - parser.cdata = "" - parser.state = S.TEXT - } else if (c === "]") { - parser.cdata += "]" - } else { - parser.cdata += "]]" + c - parser.state = S.CDATA - } - continue - - case S.PROC_INST: - if (c === "?") parser.state = S.PROC_INST_ENDING - else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY - else parser.procInstName += c - continue - - case S.PROC_INST_BODY: - if (!parser.procInstBody && is(whitespace, c)) continue - else if (c === "?") parser.state = S.PROC_INST_ENDING - else parser.procInstBody += c - continue - - case S.PROC_INST_ENDING: - if (c === ">") { - emitNode(parser, "onprocessinginstruction", { - name : parser.procInstName, - body : parser.procInstBody - }) - parser.procInstName = parser.procInstBody = "" - parser.state = S.TEXT - } else { - parser.procInstBody += "?" + c - parser.state = S.PROC_INST_BODY - } - continue - - case S.OPEN_TAG: - if (is(nameBody, c)) parser.tagName += c - else { - newTag(parser) - if (c === ">") openTag(parser) - else if (c === "/") parser.state = S.OPEN_TAG_SLASH - else { - if (not(whitespace, c)) strictFail( - parser, "Invalid character in tag name") + case S.COMMENT: + if (c === '-') { + parser.state = S.COMMENT_ENDING + } else { + parser.comment += c + } + continue + + case S.COMMENT_ENDING: + if (c === '-') { + parser.state = S.COMMENT_ENDED + parser.comment = textopts(parser.opt, parser.comment) + if (parser.comment) { + emitNode(parser, 'oncomment', parser.comment) + } + parser.comment = '' + } else { + parser.comment += '-' + c + parser.state = S.COMMENT + } + continue + + case S.COMMENT_ENDED: + if (c !== '>') { + strictFail(parser, 'Malformed comment') + // allow <!-- blah -- bloo --> in non-strict mode, + // which is a comment of " blah -- bloo " + parser.comment += '--' + c + parser.state = S.COMMENT + } else { + parser.state = S.TEXT + } + continue + + case S.CDATA: + if (c === ']') { + parser.state = S.CDATA_ENDING + } else { + parser.cdata += c + } + continue + + case S.CDATA_ENDING: + if (c === ']') { + parser.state = S.CDATA_ENDING_2 + } else { + parser.cdata += ']' + c + parser.state = S.CDATA + } + continue + + case S.CDATA_ENDING_2: + if (c === '>') { + if (parser.cdata) { + emitNode(parser, 'oncdata', parser.cdata) + } + emitNode(parser, 'onclosecdata') + parser.cdata = '' + parser.state = S.TEXT + } else if (c === ']') { + parser.cdata += ']' + } else { + parser.cdata += ']]' + c + parser.state = S.CDATA + } + continue + + case S.PROC_INST: + if (c === '?') { + parser.state = S.PROC_INST_ENDING + } else if (is(whitespace, c)) { + parser.state = S.PROC_INST_BODY + } else { + parser.procInstName += c + } + continue + + case S.PROC_INST_BODY: + if (!parser.procInstBody && is(whitespace, c)) { + continue + } else if (c === '?') { + parser.state = S.PROC_INST_ENDING + } else { + parser.procInstBody += c + } + continue + + case S.PROC_INST_ENDING: + if (c === '>') { + emitNode(parser, 'onprocessinginstruction', { + name: parser.procInstName, + body: parser.procInstBody + }) + parser.procInstName = parser.procInstBody = '' + parser.state = S.TEXT + } else { + parser.procInstBody += '?' + c + parser.state = S.PROC_INST_BODY + } + continue + + case S.OPEN_TAG: + if (is(nameBody, c)) { + parser.tagName += c + } else { + newTag(parser) + if (c === '>') { + openTag(parser) + } else if (c === '/') { + parser.state = S.OPEN_TAG_SLASH + } else { + if (not(whitespace, c)) { + strictFail(parser, 'Invalid character in tag name') + } + parser.state = S.ATTRIB + } + } + continue + + case S.OPEN_TAG_SLASH: + if (c === '>') { + openTag(parser, true) + closeTag(parser) + } else { + strictFail(parser, 'Forward-slash in opening tag not followed by >') parser.state = S.ATTRIB } - } - continue + continue - case S.OPEN_TAG_SLASH: - if (c === ">") { - openTag(parser, true) - closeTag(parser) - } else { - strictFail(parser, "Forward-slash in opening tag not followed by >") - parser.state = S.ATTRIB - } - continue - - case S.ATTRIB: - // haven't read the attribute name yet. - if (is(whitespace, c)) continue - else if (c === ">") openTag(parser) - else if (c === "/") parser.state = S.OPEN_TAG_SLASH - else if (is(nameStart, c)) { - parser.attribName = c - parser.attribValue = "" - parser.state = S.ATTRIB_NAME - } else strictFail(parser, "Invalid attribute name") - continue - - case S.ATTRIB_NAME: - if (c === "=") parser.state = S.ATTRIB_VALUE - else if (c === ">") { - strictFail(parser, "Attribute without value") - parser.attribValue = parser.attribName - attrib(parser) - openTag(parser) - } - else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE - else if (is(nameBody, c)) parser.attribName += c - else strictFail(parser, "Invalid attribute name") - continue - - case S.ATTRIB_NAME_SAW_WHITE: - if (c === "=") parser.state = S.ATTRIB_VALUE - else if (is(whitespace, c)) continue - else { - strictFail(parser, "Attribute without value") - parser.tag.attributes[parser.attribName] = "" - parser.attribValue = "" - emitNode(parser, "onattribute", - { name : parser.attribName, value : "" }) - parser.attribName = "" - if (c === ">") openTag(parser) - else if (is(nameStart, c)) { + case S.ATTRIB: + // haven't read the attribute name yet. + if (is(whitespace, c)) { + continue + } else if (c === '>') { + openTag(parser) + } else if (c === '/') { + parser.state = S.OPEN_TAG_SLASH + } else if (is(nameStart, c)) { parser.attribName = c + parser.attribValue = '' parser.state = S.ATTRIB_NAME } else { - strictFail(parser, "Invalid attribute name") - parser.state = S.ATTRIB + strictFail(parser, 'Invalid attribute name') } - } - continue + continue - case S.ATTRIB_VALUE: - if (is(whitespace, c)) continue - else if (is(quote, c)) { - parser.q = c - parser.state = S.ATTRIB_VALUE_QUOTED - } else { - strictFail(parser, "Unquoted attribute value") - parser.state = S.ATTRIB_VALUE_UNQUOTED - parser.attribValue = c - } - continue + case S.ATTRIB_NAME: + if (c === '=') { + parser.state = S.ATTRIB_VALUE + } else if (c === '>') { + strictFail(parser, 'Attribute without value') + parser.attribValue = parser.attribName + attrib(parser) + openTag(parser) + } else if (is(whitespace, c)) { + parser.state = S.ATTRIB_NAME_SAW_WHITE + } else if (is(nameBody, c)) { + parser.attribName += c + } else { + strictFail(parser, 'Invalid attribute name') + } + continue - case S.ATTRIB_VALUE_QUOTED: - if (c !== parser.q) { - if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q - else parser.attribValue += c + case S.ATTRIB_NAME_SAW_WHITE: + if (c === '=') { + parser.state = S.ATTRIB_VALUE + } else if (is(whitespace, c)) { + continue + } else { + strictFail(parser, 'Attribute without value') + parser.tag.attributes[parser.attribName] = '' + parser.attribValue = '' + emitNode(parser, 'onattribute', { + name: parser.attribName, + value: '' + }) + parser.attribName = '' + if (c === '>') { + openTag(parser) + } else if (is(nameStart, c)) { + parser.attribName = c + parser.state = S.ATTRIB_NAME + } else { + strictFail(parser, 'Invalid attribute name') + parser.state = S.ATTRIB + } + } continue - } - attrib(parser) - parser.q = "" - parser.state = S.ATTRIB_VALUE_CLOSED - continue - - case S.ATTRIB_VALUE_CLOSED: - if (is(whitespace, c)) { - parser.state = S.ATTRIB - } else if (c === ">") openTag(parser) - else if (c === "/") parser.state = S.OPEN_TAG_SLASH - else if (is(nameStart, c)) { - strictFail(parser, "No whitespace between attributes") - parser.attribName = c - parser.attribValue = "" - parser.state = S.ATTRIB_NAME - } else strictFail(parser, "Invalid attribute name") - continue - - case S.ATTRIB_VALUE_UNQUOTED: - if (not(attribEnd,c)) { - if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U - else parser.attribValue += c + + case S.ATTRIB_VALUE: + if (is(whitespace, c)) { + continue + } else if (is(quote, c)) { + parser.q = c + parser.state = S.ATTRIB_VALUE_QUOTED + } else { + strictFail(parser, 'Unquoted attribute value') + parser.state = S.ATTRIB_VALUE_UNQUOTED + parser.attribValue = c + } continue - } - attrib(parser) - if (c === ">") openTag(parser) - else parser.state = S.ATTRIB - continue - - case S.CLOSE_TAG: - if (!parser.tagName) { - if (is(whitespace, c)) continue - else if (not(nameStart, c)) { - if (parser.script) { - parser.script += "</" + c - parser.state = S.SCRIPT + + case S.ATTRIB_VALUE_QUOTED: + if (c !== parser.q) { + if (c === '&') { + parser.state = S.ATTRIB_VALUE_ENTITY_Q } else { - strictFail(parser, "Invalid tagname in closing tag.") + parser.attribValue += c } - } else parser.tagName = c - } - else if (c === ">") closeTag(parser) - else if (is(nameBody, c)) parser.tagName += c - else if (parser.script) { - parser.script += "</" + parser.tagName - parser.tagName = "" - parser.state = S.SCRIPT - } else { - if (not(whitespace, c)) strictFail(parser, - "Invalid tagname in closing tag") - parser.state = S.CLOSE_TAG_SAW_WHITE - } - continue - - case S.CLOSE_TAG_SAW_WHITE: - if (is(whitespace, c)) continue - if (c === ">") closeTag(parser) - else strictFail(parser, "Invalid characters in closing tag") - continue - - case S.TEXT_ENTITY: - case S.ATTRIB_VALUE_ENTITY_Q: - case S.ATTRIB_VALUE_ENTITY_U: - switch(parser.state) { - case S.TEXT_ENTITY: - var returnState = S.TEXT, buffer = "textNode" - break - - case S.ATTRIB_VALUE_ENTITY_Q: - var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue" - break - - case S.ATTRIB_VALUE_ENTITY_U: - var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue" - break - } - if (c === ";") { - parser[buffer] += parseEntity(parser) - parser.entity = "" - parser.state = returnState - } - else if (is(entity, c)) parser.entity += c - else { - strictFail(parser, "Invalid character entity") - parser[buffer] += "&" + parser.entity + c - parser.entity = "" - parser.state = returnState - } - continue + continue + } + attrib(parser) + parser.q = '' + parser.state = S.ATTRIB_VALUE_CLOSED + continue + + case S.ATTRIB_VALUE_CLOSED: + if (is(whitespace, c)) { + parser.state = S.ATTRIB + } else if (c === '>') { + openTag(parser) + } else if (c === '/') { + parser.state = S.OPEN_TAG_SLASH + } else if (is(nameStart, c)) { + strictFail(parser, 'No whitespace between attributes') + parser.attribName = c + parser.attribValue = '' + parser.state = S.ATTRIB_NAME + } else { + strictFail(parser, 'Invalid attribute name') + } + continue - default: - throw new Error(parser, "Unknown state: " + parser.state) + case S.ATTRIB_VALUE_UNQUOTED: + if (not(attribEnd, c)) { + if (c === '&') { + parser.state = S.ATTRIB_VALUE_ENTITY_U + } else { + parser.attribValue += c + } + continue + } + attrib(parser) + if (c === '>') { + openTag(parser) + } else { + parser.state = S.ATTRIB + } + continue + + case S.CLOSE_TAG: + if (!parser.tagName) { + if (is(whitespace, c)) { + continue + } else if (not(nameStart, c)) { + if (parser.script) { + parser.script += '</' + c + parser.state = S.SCRIPT + } else { + strictFail(parser, 'Invalid tagname in closing tag.') + } + } else { + parser.tagName = c + } + } else if (c === '>') { + closeTag(parser) + } else if (is(nameBody, c)) { + parser.tagName += c + } else if (parser.script) { + parser.script += '</' + parser.tagName + parser.tagName = '' + parser.state = S.SCRIPT + } else { + if (not(whitespace, c)) { + strictFail(parser, 'Invalid tagname in closing tag') + } + parser.state = S.CLOSE_TAG_SAW_WHITE + } + continue + + case S.CLOSE_TAG_SAW_WHITE: + if (is(whitespace, c)) { + continue + } + if (c === '>') { + closeTag(parser) + } else { + strictFail(parser, 'Invalid characters in closing tag') + } + continue + + case S.TEXT_ENTITY: + case S.ATTRIB_VALUE_ENTITY_Q: + case S.ATTRIB_VALUE_ENTITY_U: + var returnState + var buffer + switch (parser.state) { + case S.TEXT_ENTITY: + returnState = S.TEXT + buffer = 'textNode' + break + + case S.ATTRIB_VALUE_ENTITY_Q: + returnState = S.ATTRIB_VALUE_QUOTED + buffer = 'attribValue' + break + + case S.ATTRIB_VALUE_ENTITY_U: + returnState = S.ATTRIB_VALUE_UNQUOTED + buffer = 'attribValue' + break + } + + if (c === ';') { + parser[buffer] += parseEntity(parser) + parser.entity = '' + parser.state = returnState + } else if (is(parser.entity.length ? entityBody : entityStart, c)) { + parser.entity += c + } else { + strictFail(parser, 'Invalid character in entity name') + parser[buffer] += '&' + parser.entity + c + parser.entity = '' + parser.state = returnState + } + + continue + + default: + throw new Error(parser, 'Unknown state: ' + parser.state) + } + } // while + + if (parser.position >= parser.bufferCheckPosition) { + checkBufferLength(parser) } - } // while - // cdata blocks can get very big under normal conditions. emit and move on. - // if (parser.state === S.CDATA && parser.cdata) { - // emitNode(parser, "oncdata", parser.cdata) - // parser.cdata = "" - // } - if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser) - return parser -} - -/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ -if (!String.fromCodePoint) { - (function() { - var stringFromCharCode = String.fromCharCode; - var floor = Math.floor; - var fromCodePoint = function() { - var MAX_SIZE = 0x4000; - var codeUnits = []; - var highSurrogate; - var lowSurrogate; - var index = -1; - var length = arguments.length; - if (!length) { - return ''; - } - var result = ''; - while (++index < length) { - var codePoint = Number(arguments[index]); - if ( - !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` - codePoint < 0 || // not a valid Unicode code point - codePoint > 0x10FFFF || // not a valid Unicode code point - floor(codePoint) != codePoint // not an integer - ) { - throw RangeError('Invalid code point: ' + codePoint); - } - if (codePoint <= 0xFFFF) { // BMP code point - codeUnits.push(codePoint); - } else { // Astral code point; split in surrogate halves - // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae - codePoint -= 0x10000; - highSurrogate = (codePoint >> 10) + 0xD800; - lowSurrogate = (codePoint % 0x400) + 0xDC00; - codeUnits.push(highSurrogate, lowSurrogate); - } - if (index + 1 == length || codeUnits.length > MAX_SIZE) { - result += stringFromCharCode.apply(null, codeUnits); - codeUnits.length = 0; - } - } - return result; - }; - if (Object.defineProperty) { - Object.defineProperty(String, 'fromCodePoint', { - 'value': fromCodePoint, - 'configurable': true, - 'writable': true - }); - } else { - String.fromCodePoint = fromCodePoint; - } - }()); -} + return parser + } -})(typeof exports === "undefined" ? sax = {} : exports); + /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */ + if (!String.fromCodePoint) { + (function () { + var stringFromCharCode = String.fromCharCode + var floor = Math.floor + var fromCodePoint = function () { + var MAX_SIZE = 0x4000 + var codeUnits = [] + var highSurrogate + var lowSurrogate + var index = -1 + var length = arguments.length + if (!length) { + return '' + } + var result = '' + while (++index < length) { + var codePoint = Number(arguments[index]) + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) !== codePoint // not an integer + ) { + throw RangeError('Invalid code point: ' + codePoint) + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint) + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000 + highSurrogate = (codePoint >> 10) + 0xD800 + lowSurrogate = (codePoint % 0x400) + 0xDC00 + codeUnits.push(highSurrogate, lowSurrogate) + } + if (index + 1 === length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits) + codeUnits.length = 0 + } + } + return result + } + if (Object.defineProperty) { + Object.defineProperty(String, 'fromCodePoint', { + value: fromCodePoint, + configurable: true, + writable: true + }) + } else { + String.fromCodePoint = fromCodePoint + } + }()) + } +})(typeof exports === 'undefined' ? this.sax = {} : exports) |