aboutsummaryrefslogtreecommitdiff
path: root/node_modules/sax/lib/sax.js
diff options
context:
space:
mode:
authorFlorian Dold <florian.dold@gmail.com>2016-11-16 01:59:39 +0100
committerFlorian Dold <florian.dold@gmail.com>2016-11-16 02:00:31 +0100
commitbd65bb67e25a79b019d745b7262b2008ce2adb15 (patch)
tree89e1b032103a63737f1a703e6a943832ef261704 /node_modules/sax/lib/sax.js
parentf91466595b651721690133f58ab37f977539e95b (diff)
incrementally verify denoms
The denominations are not stored in a separate object store.
Diffstat (limited to 'node_modules/sax/lib/sax.js')
-rw-r--r--node_modules/sax/lib/sax.js2758
1 files changed, 1462 insertions, 1296 deletions
diff --git a/node_modules/sax/lib/sax.js b/node_modules/sax/lib/sax.js
index 410a50748..f125c5fee 100644
--- a/node_modules/sax/lib/sax.js
+++ b/node_modules/sax/lib/sax.js
@@ -1,1410 +1,1576 @@
-// wrapper for non-node envs
-;(function (sax) {
-
-sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
-sax.SAXParser = SAXParser
-sax.SAXStream = SAXStream
-sax.createStream = createStream
-
-// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
-// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
-// since that's the earliest that a buffer overrun could occur. This way, checks are
-// as rare as required, but as often as necessary to ensure never crossing this bound.
-// Furthermore, buffers are only tested at most once per write(), so passing a very
-// large string into write() might have undesirable effects, but this is manageable by
-// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
-// edge case, result in creating at most one complete copy of the string passed in.
-// Set to Infinity to have unlimited buffers.
-sax.MAX_BUFFER_LENGTH = 64 * 1024
-
-var buffers = [
- "comment", "sgmlDecl", "textNode", "tagName", "doctype",
- "procInstName", "procInstBody", "entity", "attribName",
- "attribValue", "cdata", "script"
-]
-
-sax.EVENTS = // for discoverability.
- [ "text"
- , "processinginstruction"
- , "sgmldeclaration"
- , "doctype"
- , "comment"
- , "attribute"
- , "opentag"
- , "closetag"
- , "opencdata"
- , "cdata"
- , "closecdata"
- , "error"
- , "end"
- , "ready"
- , "script"
- , "opennamespace"
- , "closenamespace"
+;(function (sax) { // wrapper for non-node envs
+ sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
+ sax.SAXParser = SAXParser
+ sax.SAXStream = SAXStream
+ sax.createStream = createStream
+
+ // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
+ // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
+ // since that's the earliest that a buffer overrun could occur. This way, checks are
+ // as rare as required, but as often as necessary to ensure never crossing this bound.
+ // Furthermore, buffers are only tested at most once per write(), so passing a very
+ // large string into write() might have undesirable effects, but this is manageable by
+ // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
+ // edge case, result in creating at most one complete copy of the string passed in.
+ // Set to Infinity to have unlimited buffers.
+ sax.MAX_BUFFER_LENGTH = 64 * 1024
+
+ var buffers = [
+ 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
+ 'procInstName', 'procInstBody', 'entity', 'attribName',
+ 'attribValue', 'cdata', 'script'
]
-function SAXParser (strict, opt) {
- if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
-
- var parser = this
- clearBuffers(parser)
- parser.q = parser.c = ""
- parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
- parser.opt = opt || {}
- parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
- parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
- parser.tags = []
- parser.closed = parser.closedRoot = parser.sawRoot = false
- parser.tag = parser.error = null
- parser.strict = !!strict
- parser.noscript = !!(strict || parser.opt.noscript)
- parser.state = S.BEGIN
- parser.ENTITIES = Object.create(sax.ENTITIES)
- parser.attribList = []
-
- // namespaces form a prototype chain.
- // it always points at the current tag,
- // which protos to its parent tag.
- if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
-
- // mostly just for error reporting
- parser.trackPosition = parser.opt.position !== false
- if (parser.trackPosition) {
- parser.position = parser.line = parser.column = 0
- }
- emit(parser, "onready")
-}
-
-if (!Object.create) Object.create = function (o) {
- function f () { this.__proto__ = o }
- f.prototype = o
- return new f
-}
-
-if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
- return o.__proto__
-}
-
-if (!Object.keys) Object.keys = function (o) {
- var a = []
- for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
- return a
-}
-
-function checkBufferLength (parser) {
- var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
- , maxActual = 0
- for (var i = 0, l = buffers.length; i < l; i ++) {
- var len = parser[buffers[i]].length
- if (len > maxAllowed) {
- // Text/cdata nodes can get big, and since they're buffered,
- // we can get here under normal conditions.
- // Avoid issues by emitting the text node now,
- // so at least it won't get any bigger.
- switch (buffers[i]) {
- case "textNode":
- closeText(parser)
- break
+ sax.EVENTS = [
+ 'text',
+ 'processinginstruction',
+ 'sgmldeclaration',
+ 'doctype',
+ 'comment',
+ 'opentagstart',
+ 'attribute',
+ 'opentag',
+ 'closetag',
+ 'opencdata',
+ 'cdata',
+ 'closecdata',
+ 'error',
+ 'end',
+ 'ready',
+ 'script',
+ 'opennamespace',
+ 'closenamespace'
+ ]
- case "cdata":
- emitNode(parser, "oncdata", parser.cdata)
- parser.cdata = ""
- break
+ function SAXParser (strict, opt) {
+ if (!(this instanceof SAXParser)) {
+ return new SAXParser(strict, opt)
+ }
- case "script":
- emitNode(parser, "onscript", parser.script)
- parser.script = ""
- break
+ var parser = this
+ clearBuffers(parser)
+ parser.q = parser.c = ''
+ parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
+ parser.opt = opt || {}
+ parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
+ parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
+ parser.tags = []
+ parser.closed = parser.closedRoot = parser.sawRoot = false
+ parser.tag = parser.error = null
+ parser.strict = !!strict
+ parser.noscript = !!(strict || parser.opt.noscript)
+ parser.state = S.BEGIN
+ parser.strictEntities = parser.opt.strictEntities
+ parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
+ parser.attribList = []
+
+ // namespaces form a prototype chain.
+ // it always points at the current tag,
+ // which protos to its parent tag.
+ if (parser.opt.xmlns) {
+ parser.ns = Object.create(rootNS)
+ }
- default:
- error(parser, "Max buffer length exceeded: "+buffers[i])
- }
+ // mostly just for error reporting
+ parser.trackPosition = parser.opt.position !== false
+ if (parser.trackPosition) {
+ parser.position = parser.line = parser.column = 0
}
- maxActual = Math.max(maxActual, len)
- }
- // schedule the next check for the earliest possible buffer overrun.
- parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
- + parser.position
-}
-
-function clearBuffers (parser) {
- for (var i = 0, l = buffers.length; i < l; i ++) {
- parser[buffers[i]] = ""
+ emit(parser, 'onready')
}
-}
-function flushBuffers (parser) {
- closeText(parser)
- if (parser.cdata !== "") {
- emitNode(parser, "oncdata", parser.cdata)
- parser.cdata = ""
+ if (!Object.create) {
+ Object.create = function (o) {
+ function F () {}
+ F.prototype = o
+ var newf = new F()
+ return newf
+ }
}
- if (parser.script !== "") {
- emitNode(parser, "onscript", parser.script)
- parser.script = ""
+
+ if (!Object.keys) {
+ Object.keys = function (o) {
+ var a = []
+ for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
+ return a
+ }
}
-}
-
-SAXParser.prototype =
- { end: function () { end(this) }
- , write: write
- , resume: function () { this.error = null; return this }
- , close: function () { return this.write(null) }
- , flush: function () { flushBuffers(this) }
+
+ function checkBufferLength (parser) {
+ var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
+ var maxActual = 0
+ for (var i = 0, l = buffers.length; i < l; i++) {
+ var len = parser[buffers[i]].length
+ if (len > maxAllowed) {
+ // Text/cdata nodes can get big, and since they're buffered,
+ // we can get here under normal conditions.
+ // Avoid issues by emitting the text node now,
+ // so at least it won't get any bigger.
+ switch (buffers[i]) {
+ case 'textNode':
+ closeText(parser)
+ break
+
+ case 'cdata':
+ emitNode(parser, 'oncdata', parser.cdata)
+ parser.cdata = ''
+ break
+
+ case 'script':
+ emitNode(parser, 'onscript', parser.script)
+ parser.script = ''
+ break
+
+ default:
+ error(parser, 'Max buffer length exceeded: ' + buffers[i])
+ }
+ }
+ maxActual = Math.max(maxActual, len)
+ }
+ // schedule the next check for the earliest possible buffer overrun.
+ var m = sax.MAX_BUFFER_LENGTH - maxActual
+ parser.bufferCheckPosition = m + parser.position
}
-try {
- var Stream = require("stream").Stream
-} catch (ex) {
- var Stream = function () {}
-}
+ function clearBuffers (parser) {
+ for (var i = 0, l = buffers.length; i < l; i++) {
+ parser[buffers[i]] = ''
+ }
+ }
+ function flushBuffers (parser) {
+ closeText(parser)
+ if (parser.cdata !== '') {
+ emitNode(parser, 'oncdata', parser.cdata)
+ parser.cdata = ''
+ }
+ if (parser.script !== '') {
+ emitNode(parser, 'onscript', parser.script)
+ parser.script = ''
+ }
+ }
-var streamWraps = sax.EVENTS.filter(function (ev) {
- return ev !== "error" && ev !== "end"
-})
+ SAXParser.prototype = {
+ end: function () { end(this) },
+ write: write,
+ resume: function () { this.error = null; return this },
+ close: function () { return this.write(null) },
+ flush: function () { flushBuffers(this) }
+ }
-function createStream (strict, opt) {
- return new SAXStream(strict, opt)
-}
+ var Stream
+ try {
+ Stream = require('stream').Stream
+ } catch (ex) {
+ Stream = function () {}
+ }
-function SAXStream (strict, opt) {
- if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
+ var streamWraps = sax.EVENTS.filter(function (ev) {
+ return ev !== 'error' && ev !== 'end'
+ })
- Stream.apply(this)
+ function createStream (strict, opt) {
+ return new SAXStream(strict, opt)
+ }
- this._parser = new SAXParser(strict, opt)
- this.writable = true
- this.readable = true
+ function SAXStream (strict, opt) {
+ if (!(this instanceof SAXStream)) {
+ return new SAXStream(strict, opt)
+ }
+ Stream.apply(this)
- var me = this
+ this._parser = new SAXParser(strict, opt)
+ this.writable = true
+ this.readable = true
- this._parser.onend = function () {
- me.emit("end")
- }
+ var me = this
- this._parser.onerror = function (er) {
- me.emit("error", er)
+ this._parser.onend = function () {
+ me.emit('end')
+ }
- // if didn't throw, then means error was handled.
- // go ahead and clear error, so we can write again.
- me._parser.error = null
- }
+ this._parser.onerror = function (er) {
+ me.emit('error', er)
- this._decoder = null;
+ // if didn't throw, then means error was handled.
+ // go ahead and clear error, so we can write again.
+ me._parser.error = null
+ }
- streamWraps.forEach(function (ev) {
- Object.defineProperty(me, "on" + ev, {
- get: function () { return me._parser["on" + ev] },
- set: function (h) {
- if (!h) {
- me.removeAllListeners(ev)
- return me._parser["on"+ev] = h
- }
- me.on(ev, h)
- },
- enumerable: true,
- configurable: false
+ this._decoder = null
+
+ streamWraps.forEach(function (ev) {
+ Object.defineProperty(me, 'on' + ev, {
+ get: function () {
+ return me._parser['on' + ev]
+ },
+ set: function (h) {
+ if (!h) {
+ me.removeAllListeners(ev)
+ me._parser['on' + ev] = h
+ return h
+ }
+ me.on(ev, h)
+ },
+ enumerable: true,
+ configurable: false
+ })
})
- })
-}
+ }
-SAXStream.prototype = Object.create(Stream.prototype,
- { constructor: { value: SAXStream } })
+ SAXStream.prototype = Object.create(Stream.prototype, {
+ constructor: {
+ value: SAXStream
+ }
+ })
-SAXStream.prototype.write = function (data) {
- if (typeof Buffer === 'function' &&
+ SAXStream.prototype.write = function (data) {
+ if (typeof Buffer === 'function' &&
typeof Buffer.isBuffer === 'function' &&
Buffer.isBuffer(data)) {
- if (!this._decoder) {
- var SD = require('string_decoder').StringDecoder
- this._decoder = new SD('utf8')
+ if (!this._decoder) {
+ var SD = require('string_decoder').StringDecoder
+ this._decoder = new SD('utf8')
+ }
+ data = this._decoder.write(data)
}
- data = this._decoder.write(data);
+
+ this._parser.write(data.toString())
+ this.emit('data', data)
+ return true
}
- this._parser.write(data.toString())
- this.emit("data", data)
- return true
-}
-
-SAXStream.prototype.end = function (chunk) {
- if (chunk && chunk.length) this.write(chunk)
- this._parser.end()
- return true
-}
-
-SAXStream.prototype.on = function (ev, handler) {
- var me = this
- if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
- me._parser["on"+ev] = function () {
- var args = arguments.length === 1 ? [arguments[0]]
- : Array.apply(null, arguments)
- args.splice(0, 0, ev)
- me.emit.apply(me, args)
+ SAXStream.prototype.end = function (chunk) {
+ if (chunk && chunk.length) {
+ this.write(chunk)
}
+ this._parser.end()
+ return true
}
- return Stream.prototype.on.call(me, ev, handler)
-}
+ SAXStream.prototype.on = function (ev, handler) {
+ var me = this
+ if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
+ me._parser['on' + ev] = function () {
+ var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
+ args.splice(0, 0, ev)
+ me.emit.apply(me, args)
+ }
+ }
+ return Stream.prototype.on.call(me, ev, handler)
+ }
+ // character classes and tokens
+ var whitespace = '\r\n\t '
-// character classes and tokens
-var whitespace = "\r\n\t "
// this really needs to be replaced with character classes.
// XML allows all manner of ridiculous numbers and digits.
- , number = "0124356789"
- , letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ var number = '0124356789'
+ var letter = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
// (Letter | "_" | ":")
- , quote = "'\""
- , entity = number+letter+"#"
- , attribEnd = whitespace + ">"
- , CDATA = "[CDATA["
- , DOCTYPE = "DOCTYPE"
- , XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
- , XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
- , rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
-
-// turn all the string character sets into character class objects.
-whitespace = charClass(whitespace)
-number = charClass(number)
-letter = charClass(letter)
-
-// http://www.w3.org/TR/REC-xml/#NT-NameStartChar
-// This implementation works on strings, a single character at a time
-// as such, it cannot ever support astral-plane characters (10000-EFFFF)
-// without a significant breaking change to either this parser, or the
-// JavaScript language. Implementation of an emoji-capable xml parser
-// is left as an exercise for the reader.
-var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
-
-var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
-
-quote = charClass(quote)
-entity = charClass(entity)
-attribEnd = charClass(attribEnd)
-
-function charClass (str) {
- return str.split("").reduce(function (s, c) {
- s[c] = true
- return s
- }, {})
-}
-
-function isRegExp (c) {
- return Object.prototype.toString.call(c) === '[object RegExp]'
-}
-
-function is (charclass, c) {
- return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
-}
-
-function not (charclass, c) {
- return !is(charclass, c)
-}
-
-var S = 0
-sax.STATE =
-{ BEGIN : S++
-, TEXT : S++ // general stuff
-, TEXT_ENTITY : S++ // &amp and such.
-, OPEN_WAKA : S++ // <
-, SGML_DECL : S++ // <!BLARG
-, SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
-, DOCTYPE : S++ // <!DOCTYPE
-, DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
-, DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
-, DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
-, COMMENT_STARTING : S++ // <!-
-, COMMENT : S++ // <!--
-, COMMENT_ENDING : S++ // <!-- blah -
-, COMMENT_ENDED : S++ // <!-- blah --
-, CDATA : S++ // <![CDATA[ something
-, CDATA_ENDING : S++ // ]
-, CDATA_ENDING_2 : S++ // ]]
-, PROC_INST : S++ // <?hi
-, PROC_INST_BODY : S++ // <?hi there
-, PROC_INST_ENDING : S++ // <?hi "there" ?
-, OPEN_TAG : S++ // <strong
-, OPEN_TAG_SLASH : S++ // <strong /
-, ATTRIB : S++ // <a
-, ATTRIB_NAME : S++ // <a foo
-, ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
-, ATTRIB_VALUE : S++ // <a foo=
-, ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
-, ATTRIB_VALUE_CLOSED : S++ // <a foo="bar"
-, ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
-, ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="&quot;"
-, ATTRIB_VALUE_ENTITY_U : S++ // <foo bar=&quot;
-, CLOSE_TAG : S++ // </a
-, CLOSE_TAG_SAW_WHITE : S++ // </a >
-, SCRIPT : S++ // <script> ...
-, SCRIPT_ENDING : S++ // <script> ... <
-}
-
-sax.ENTITIES =
-{ "amp" : "&"
-, "gt" : ">"
-, "lt" : "<"
-, "quot" : "\""
-, "apos" : "'"
-, "AElig" : 198
-, "Aacute" : 193
-, "Acirc" : 194
-, "Agrave" : 192
-, "Aring" : 197
-, "Atilde" : 195
-, "Auml" : 196
-, "Ccedil" : 199
-, "ETH" : 208
-, "Eacute" : 201
-, "Ecirc" : 202
-, "Egrave" : 200
-, "Euml" : 203
-, "Iacute" : 205
-, "Icirc" : 206
-, "Igrave" : 204
-, "Iuml" : 207
-, "Ntilde" : 209
-, "Oacute" : 211
-, "Ocirc" : 212
-, "Ograve" : 210
-, "Oslash" : 216
-, "Otilde" : 213
-, "Ouml" : 214
-, "THORN" : 222
-, "Uacute" : 218
-, "Ucirc" : 219
-, "Ugrave" : 217
-, "Uuml" : 220
-, "Yacute" : 221
-, "aacute" : 225
-, "acirc" : 226
-, "aelig" : 230
-, "agrave" : 224
-, "aring" : 229
-, "atilde" : 227
-, "auml" : 228
-, "ccedil" : 231
-, "eacute" : 233
-, "ecirc" : 234
-, "egrave" : 232
-, "eth" : 240
-, "euml" : 235
-, "iacute" : 237
-, "icirc" : 238
-, "igrave" : 236
-, "iuml" : 239
-, "ntilde" : 241
-, "oacute" : 243
-, "ocirc" : 244
-, "ograve" : 242
-, "oslash" : 248
-, "otilde" : 245
-, "ouml" : 246
-, "szlig" : 223
-, "thorn" : 254
-, "uacute" : 250
-, "ucirc" : 251
-, "ugrave" : 249
-, "uuml" : 252
-, "yacute" : 253
-, "yuml" : 255
-, "copy" : 169
-, "reg" : 174
-, "nbsp" : 160
-, "iexcl" : 161
-, "cent" : 162
-, "pound" : 163
-, "curren" : 164
-, "yen" : 165
-, "brvbar" : 166
-, "sect" : 167
-, "uml" : 168
-, "ordf" : 170
-, "laquo" : 171
-, "not" : 172
-, "shy" : 173
-, "macr" : 175
-, "deg" : 176
-, "plusmn" : 177
-, "sup1" : 185
-, "sup2" : 178
-, "sup3" : 179
-, "acute" : 180
-, "micro" : 181
-, "para" : 182
-, "middot" : 183
-, "cedil" : 184
-, "ordm" : 186
-, "raquo" : 187
-, "frac14" : 188
-, "frac12" : 189
-, "frac34" : 190
-, "iquest" : 191
-, "times" : 215
-, "divide" : 247
-, "OElig" : 338
-, "oelig" : 339
-, "Scaron" : 352
-, "scaron" : 353
-, "Yuml" : 376
-, "fnof" : 402
-, "circ" : 710
-, "tilde" : 732
-, "Alpha" : 913
-, "Beta" : 914
-, "Gamma" : 915
-, "Delta" : 916
-, "Epsilon" : 917
-, "Zeta" : 918
-, "Eta" : 919
-, "Theta" : 920
-, "Iota" : 921
-, "Kappa" : 922
-, "Lambda" : 923
-, "Mu" : 924
-, "Nu" : 925
-, "Xi" : 926
-, "Omicron" : 927
-, "Pi" : 928
-, "Rho" : 929
-, "Sigma" : 931
-, "Tau" : 932
-, "Upsilon" : 933
-, "Phi" : 934
-, "Chi" : 935
-, "Psi" : 936
-, "Omega" : 937
-, "alpha" : 945
-, "beta" : 946
-, "gamma" : 947
-, "delta" : 948
-, "epsilon" : 949
-, "zeta" : 950
-, "eta" : 951
-, "theta" : 952
-, "iota" : 953
-, "kappa" : 954
-, "lambda" : 955
-, "mu" : 956
-, "nu" : 957
-, "xi" : 958
-, "omicron" : 959
-, "pi" : 960
-, "rho" : 961
-, "sigmaf" : 962
-, "sigma" : 963
-, "tau" : 964
-, "upsilon" : 965
-, "phi" : 966
-, "chi" : 967
-, "psi" : 968
-, "omega" : 969
-, "thetasym" : 977
-, "upsih" : 978
-, "piv" : 982
-, "ensp" : 8194
-, "emsp" : 8195
-, "thinsp" : 8201
-, "zwnj" : 8204
-, "zwj" : 8205
-, "lrm" : 8206
-, "rlm" : 8207
-, "ndash" : 8211
-, "mdash" : 8212
-, "lsquo" : 8216
-, "rsquo" : 8217
-, "sbquo" : 8218
-, "ldquo" : 8220
-, "rdquo" : 8221
-, "bdquo" : 8222
-, "dagger" : 8224
-, "Dagger" : 8225
-, "bull" : 8226
-, "hellip" : 8230
-, "permil" : 8240
-, "prime" : 8242
-, "Prime" : 8243
-, "lsaquo" : 8249
-, "rsaquo" : 8250
-, "oline" : 8254
-, "frasl" : 8260
-, "euro" : 8364
-, "image" : 8465
-, "weierp" : 8472
-, "real" : 8476
-, "trade" : 8482
-, "alefsym" : 8501
-, "larr" : 8592
-, "uarr" : 8593
-, "rarr" : 8594
-, "darr" : 8595
-, "harr" : 8596
-, "crarr" : 8629
-, "lArr" : 8656
-, "uArr" : 8657
-, "rArr" : 8658
-, "dArr" : 8659
-, "hArr" : 8660
-, "forall" : 8704
-, "part" : 8706
-, "exist" : 8707
-, "empty" : 8709
-, "nabla" : 8711
-, "isin" : 8712
-, "notin" : 8713
-, "ni" : 8715
-, "prod" : 8719
-, "sum" : 8721
-, "minus" : 8722
-, "lowast" : 8727
-, "radic" : 8730
-, "prop" : 8733
-, "infin" : 8734
-, "ang" : 8736
-, "and" : 8743
-, "or" : 8744
-, "cap" : 8745
-, "cup" : 8746
-, "int" : 8747
-, "there4" : 8756
-, "sim" : 8764
-, "cong" : 8773
-, "asymp" : 8776
-, "ne" : 8800
-, "equiv" : 8801
-, "le" : 8804
-, "ge" : 8805
-, "sub" : 8834
-, "sup" : 8835
-, "nsub" : 8836
-, "sube" : 8838
-, "supe" : 8839
-, "oplus" : 8853
-, "otimes" : 8855
-, "perp" : 8869
-, "sdot" : 8901
-, "lceil" : 8968
-, "rceil" : 8969
-, "lfloor" : 8970
-, "rfloor" : 8971
-, "lang" : 9001
-, "rang" : 9002
-, "loz" : 9674
-, "spades" : 9824
-, "clubs" : 9827
-, "hearts" : 9829
-, "diams" : 9830
-}
-
-Object.keys(sax.ENTITIES).forEach(function (key) {
+ var quote = '\'"'
+ var attribEnd = whitespace + '>'
+ var CDATA = '[CDATA['
+ var DOCTYPE = 'DOCTYPE'
+ var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
+ var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
+ var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
+
+ // turn all the string character sets into character class objects.
+ whitespace = charClass(whitespace)
+ number = charClass(number)
+ letter = charClass(letter)
+
+ // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
+ // This implementation works on strings, a single character at a time
+ // as such, it cannot ever support astral-plane characters (10000-EFFFF)
+ // without a significant breaking change to either this parser, or the
+ // JavaScript language. Implementation of an emoji-capable xml parser
+ // is left as an exercise for the reader.
+ var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
+
+ var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
+
+ var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
+ var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
+
+ quote = charClass(quote)
+ attribEnd = charClass(attribEnd)
+
+ function charClass (str) {
+ return str.split('').reduce(function (s, c) {
+ s[c] = true
+ return s
+ }, {})
+ }
+
+ function isRegExp (c) {
+ return Object.prototype.toString.call(c) === '[object RegExp]'
+ }
+
+ function is (charclass, c) {
+ return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
+ }
+
+ function not (charclass, c) {
+ return !is(charclass, c)
+ }
+
+ var S = 0
+ sax.STATE = {
+ BEGIN: S++, // leading byte order mark or whitespace
+ BEGIN_WHITESPACE: S++, // leading whitespace
+ TEXT: S++, // general stuff
+ TEXT_ENTITY: S++, // &amp and such.
+ OPEN_WAKA: S++, // <
+ SGML_DECL: S++, // <!BLARG
+ SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
+ DOCTYPE: S++, // <!DOCTYPE
+ DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
+ DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
+ DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
+ COMMENT_STARTING: S++, // <!-
+ COMMENT: S++, // <!--
+ COMMENT_ENDING: S++, // <!-- blah -
+ COMMENT_ENDED: S++, // <!-- blah --
+ CDATA: S++, // <![CDATA[ something
+ CDATA_ENDING: S++, // ]
+ CDATA_ENDING_2: S++, // ]]
+ PROC_INST: S++, // <?hi
+ PROC_INST_BODY: S++, // <?hi there
+ PROC_INST_ENDING: S++, // <?hi "there" ?
+ OPEN_TAG: S++, // <strong
+ OPEN_TAG_SLASH: S++, // <strong /
+ ATTRIB: S++, // <a
+ ATTRIB_NAME: S++, // <a foo
+ ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
+ ATTRIB_VALUE: S++, // <a foo=
+ ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
+ ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
+ ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
+ ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
+ ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
+ CLOSE_TAG: S++, // </a
+ CLOSE_TAG_SAW_WHITE: S++, // </a >
+ SCRIPT: S++, // <script> ...
+ SCRIPT_ENDING: S++ // <script> ... <
+ }
+
+ sax.XML_ENTITIES = {
+ 'amp': '&',
+ 'gt': '>',
+ 'lt': '<',
+ 'quot': '"',
+ 'apos': "'"
+ }
+
+ sax.ENTITIES = {
+ 'amp': '&',
+ 'gt': '>',
+ 'lt': '<',
+ 'quot': '"',
+ 'apos': "'",
+ 'AElig': 198,
+ 'Aacute': 193,
+ 'Acirc': 194,
+ 'Agrave': 192,
+ 'Aring': 197,
+ 'Atilde': 195,
+ 'Auml': 196,
+ 'Ccedil': 199,
+ 'ETH': 208,
+ 'Eacute': 201,
+ 'Ecirc': 202,
+ 'Egrave': 200,
+ 'Euml': 203,
+ 'Iacute': 205,
+ 'Icirc': 206,
+ 'Igrave': 204,
+ 'Iuml': 207,
+ 'Ntilde': 209,
+ 'Oacute': 211,
+ 'Ocirc': 212,
+ 'Ograve': 210,
+ 'Oslash': 216,
+ 'Otilde': 213,
+ 'Ouml': 214,
+ 'THORN': 222,
+ 'Uacute': 218,
+ 'Ucirc': 219,
+ 'Ugrave': 217,
+ 'Uuml': 220,
+ 'Yacute': 221,
+ 'aacute': 225,
+ 'acirc': 226,
+ 'aelig': 230,
+ 'agrave': 224,
+ 'aring': 229,
+ 'atilde': 227,
+ 'auml': 228,
+ 'ccedil': 231,
+ 'eacute': 233,
+ 'ecirc': 234,
+ 'egrave': 232,
+ 'eth': 240,
+ 'euml': 235,
+ 'iacute': 237,
+ 'icirc': 238,
+ 'igrave': 236,
+ 'iuml': 239,
+ 'ntilde': 241,
+ 'oacute': 243,
+ 'ocirc': 244,
+ 'ograve': 242,
+ 'oslash': 248,
+ 'otilde': 245,
+ 'ouml': 246,
+ 'szlig': 223,
+ 'thorn': 254,
+ 'uacute': 250,
+ 'ucirc': 251,
+ 'ugrave': 249,
+ 'uuml': 252,
+ 'yacute': 253,
+ 'yuml': 255,
+ 'copy': 169,
+ 'reg': 174,
+ 'nbsp': 160,
+ 'iexcl': 161,
+ 'cent': 162,
+ 'pound': 163,
+ 'curren': 164,
+ 'yen': 165,
+ 'brvbar': 166,
+ 'sect': 167,
+ 'uml': 168,
+ 'ordf': 170,
+ 'laquo': 171,
+ 'not': 172,
+ 'shy': 173,
+ 'macr': 175,
+ 'deg': 176,
+ 'plusmn': 177,
+ 'sup1': 185,
+ 'sup2': 178,
+ 'sup3': 179,
+ 'acute': 180,
+ 'micro': 181,
+ 'para': 182,
+ 'middot': 183,
+ 'cedil': 184,
+ 'ordm': 186,
+ 'raquo': 187,
+ 'frac14': 188,
+ 'frac12': 189,
+ 'frac34': 190,
+ 'iquest': 191,
+ 'times': 215,
+ 'divide': 247,
+ 'OElig': 338,
+ 'oelig': 339,
+ 'Scaron': 352,
+ 'scaron': 353,
+ 'Yuml': 376,
+ 'fnof': 402,
+ 'circ': 710,
+ 'tilde': 732,
+ 'Alpha': 913,
+ 'Beta': 914,
+ 'Gamma': 915,
+ 'Delta': 916,
+ 'Epsilon': 917,
+ 'Zeta': 918,
+ 'Eta': 919,
+ 'Theta': 920,
+ 'Iota': 921,
+ 'Kappa': 922,
+ 'Lambda': 923,
+ 'Mu': 924,
+ 'Nu': 925,
+ 'Xi': 926,
+ 'Omicron': 927,
+ 'Pi': 928,
+ 'Rho': 929,
+ 'Sigma': 931,
+ 'Tau': 932,
+ 'Upsilon': 933,
+ 'Phi': 934,
+ 'Chi': 935,
+ 'Psi': 936,
+ 'Omega': 937,
+ 'alpha': 945,
+ 'beta': 946,
+ 'gamma': 947,
+ 'delta': 948,
+ 'epsilon': 949,
+ 'zeta': 950,
+ 'eta': 951,
+ 'theta': 952,
+ 'iota': 953,
+ 'kappa': 954,
+ 'lambda': 955,
+ 'mu': 956,
+ 'nu': 957,
+ 'xi': 958,
+ 'omicron': 959,
+ 'pi': 960,
+ 'rho': 961,
+ 'sigmaf': 962,
+ 'sigma': 963,
+ 'tau': 964,
+ 'upsilon': 965,
+ 'phi': 966,
+ 'chi': 967,
+ 'psi': 968,
+ 'omega': 969,
+ 'thetasym': 977,
+ 'upsih': 978,
+ 'piv': 982,
+ 'ensp': 8194,
+ 'emsp': 8195,
+ 'thinsp': 8201,
+ 'zwnj': 8204,
+ 'zwj': 8205,
+ 'lrm': 8206,
+ 'rlm': 8207,
+ 'ndash': 8211,
+ 'mdash': 8212,
+ 'lsquo': 8216,
+ 'rsquo': 8217,
+ 'sbquo': 8218,
+ 'ldquo': 8220,
+ 'rdquo': 8221,
+ 'bdquo': 8222,
+ 'dagger': 8224,
+ 'Dagger': 8225,
+ 'bull': 8226,
+ 'hellip': 8230,
+ 'permil': 8240,
+ 'prime': 8242,
+ 'Prime': 8243,
+ 'lsaquo': 8249,
+ 'rsaquo': 8250,
+ 'oline': 8254,
+ 'frasl': 8260,
+ 'euro': 8364,
+ 'image': 8465,
+ 'weierp': 8472,
+ 'real': 8476,
+ 'trade': 8482,
+ 'alefsym': 8501,
+ 'larr': 8592,
+ 'uarr': 8593,
+ 'rarr': 8594,
+ 'darr': 8595,
+ 'harr': 8596,
+ 'crarr': 8629,
+ 'lArr': 8656,
+ 'uArr': 8657,
+ 'rArr': 8658,
+ 'dArr': 8659,
+ 'hArr': 8660,
+ 'forall': 8704,
+ 'part': 8706,
+ 'exist': 8707,
+ 'empty': 8709,
+ 'nabla': 8711,
+ 'isin': 8712,
+ 'notin': 8713,
+ 'ni': 8715,
+ 'prod': 8719,
+ 'sum': 8721,
+ 'minus': 8722,
+ 'lowast': 8727,
+ 'radic': 8730,
+ 'prop': 8733,
+ 'infin': 8734,
+ 'ang': 8736,
+ 'and': 8743,
+ 'or': 8744,
+ 'cap': 8745,
+ 'cup': 8746,
+ 'int': 8747,
+ 'there4': 8756,
+ 'sim': 8764,
+ 'cong': 8773,
+ 'asymp': 8776,
+ 'ne': 8800,
+ 'equiv': 8801,
+ 'le': 8804,
+ 'ge': 8805,
+ 'sub': 8834,
+ 'sup': 8835,
+ 'nsub': 8836,
+ 'sube': 8838,
+ 'supe': 8839,
+ 'oplus': 8853,
+ 'otimes': 8855,
+ 'perp': 8869,
+ 'sdot': 8901,
+ 'lceil': 8968,
+ 'rceil': 8969,
+ 'lfloor': 8970,
+ 'rfloor': 8971,
+ 'lang': 9001,
+ 'rang': 9002,
+ 'loz': 9674,
+ 'spades': 9824,
+ 'clubs': 9827,
+ 'hearts': 9829,
+ 'diams': 9830
+ }
+
+ Object.keys(sax.ENTITIES).forEach(function (key) {
var e = sax.ENTITIES[key]
var s = typeof e === 'number' ? String.fromCharCode(e) : e
sax.ENTITIES[key] = s
-})
-
-for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
-
-// shorthand
-S = sax.STATE
-
-function emit (parser, event, data) {
- parser[event] && parser[event](data)
-}
-
-function emitNode (parser, nodeType, data) {
- if (parser.textNode) closeText(parser)
- emit(parser, nodeType, data)
-}
-
-function closeText (parser) {
- parser.textNode = textopts(parser.opt, parser.textNode)
- if (parser.textNode) emit(parser, "ontext", parser.textNode)
- parser.textNode = ""
-}
-
-function textopts (opt, text) {
- if (opt.trim) text = text.trim()
- if (opt.normalize) text = text.replace(/\s+/g, " ")
- return text
-}
-
-function error (parser, er) {
- closeText(parser)
- if (parser.trackPosition) {
- er += "\nLine: "+parser.line+
- "\nColumn: "+parser.column+
- "\nChar: "+parser.c
+ })
+
+ for (var s in sax.STATE) {
+ sax.STATE[sax.STATE[s]] = s
}
- er = new Error(er)
- parser.error = er
- emit(parser, "onerror", er)
- return parser
-}
-
-function end (parser) {
- if (!parser.closedRoot) strictFail(parser, "Unclosed root tag")
- if ((parser.state !== S.BEGIN) && (parser.state !== S.TEXT)) error(parser, "Unexpected end")
- closeText(parser)
- parser.c = ""
- parser.closed = true
- emit(parser, "onend")
- SAXParser.call(parser, parser.strict, parser.opt)
- return parser
-}
-
-function strictFail (parser, message) {
- if (typeof parser !== 'object' || !(parser instanceof SAXParser))
- throw new Error('bad call to strictFail');
- if (parser.strict) error(parser, message)
-}
-
-function newTag (parser) {
- if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
- var parent = parser.tags[parser.tags.length - 1] || parser
- , tag = parser.tag = { name : parser.tagName, attributes : {} }
-
- // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
- if (parser.opt.xmlns) tag.ns = parent.ns
- parser.attribList.length = 0
-}
-
-function qname (name, attribute) {
- var i = name.indexOf(":")
- , qualName = i < 0 ? [ "", name ] : name.split(":")
- , prefix = qualName[0]
- , local = qualName[1]
-
- // <x "xmlns"="http://foo">
- if (attribute && name === "xmlns") {
- prefix = "xmlns"
- local = ""
+
+ // shorthand
+ S = sax.STATE
+
+ function emit (parser, event, data) {
+ parser[event] && parser[event](data)
}
- return { prefix: prefix, local: local }
-}
+ function emitNode (parser, nodeType, data) {
+ if (parser.textNode) closeText(parser)
+ emit(parser, nodeType, data)
+ }
-function attrib (parser) {
- if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
+ function closeText (parser) {
+ parser.textNode = textopts(parser.opt, parser.textNode)
+ if (parser.textNode) emit(parser, 'ontext', parser.textNode)
+ parser.textNode = ''
+ }
- if (parser.attribList.indexOf(parser.attribName) !== -1 ||
- parser.tag.attributes.hasOwnProperty(parser.attribName)) {
- return parser.attribName = parser.attribValue = ""
+ function textopts (opt, text) {
+ if (opt.trim) text = text.trim()
+ if (opt.normalize) text = text.replace(/\s+/g, ' ')
+ return text
}
- if (parser.opt.xmlns) {
- var qn = qname(parser.attribName, true)
- , prefix = qn.prefix
- , local = qn.local
-
- if (prefix === "xmlns") {
- // namespace binding attribute; push the binding into scope
- if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
- strictFail( parser
- , "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
- + "Actual: " + parser.attribValue )
- } else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
- strictFail( parser
- , "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
- + "Actual: " + parser.attribValue )
- } else {
- var tag = parser.tag
- , parent = parser.tags[parser.tags.length - 1] || parser
- if (tag.ns === parent.ns) {
- tag.ns = Object.create(parent.ns)
- }
- tag.ns[local] = parser.attribValue
- }
+ function error (parser, er) {
+ closeText(parser)
+ if (parser.trackPosition) {
+ er += '\nLine: ' + parser.line +
+ '\nColumn: ' + parser.column +
+ '\nChar: ' + parser.c
}
+ er = new Error(er)
+ parser.error = er
+ emit(parser, 'onerror', er)
+ return parser
+ }
- // defer onattribute events until all attributes have been seen
- // so any new bindings can take effect; preserve attribute order
- // so deferred events can be emitted in document order
- parser.attribList.push([parser.attribName, parser.attribValue])
- } else {
- // in non-xmlns mode, we can emit the event right away
- parser.tag.attributes[parser.attribName] = parser.attribValue
- emitNode( parser
- , "onattribute"
- , { name: parser.attribName
- , value: parser.attribValue } )
+ function end (parser) {
+ if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
+ if ((parser.state !== S.BEGIN) &&
+ (parser.state !== S.BEGIN_WHITESPACE) &&
+ (parser.state !== S.TEXT)) {
+ error(parser, 'Unexpected end')
+ }
+ closeText(parser)
+ parser.c = ''
+ parser.closed = true
+ emit(parser, 'onend')
+ SAXParser.call(parser, parser.strict, parser.opt)
+ return parser
}
- parser.attribName = parser.attribValue = ""
-}
+ function strictFail (parser, message) {
+ if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
+ throw new Error('bad call to strictFail')
+ }
+ if (parser.strict) {
+ error(parser, message)
+ }
+ }
-function openTag (parser, selfClosing) {
- if (parser.opt.xmlns) {
- // emit namespace binding events
- var tag = parser.tag
+ function newTag (parser) {
+ if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
+ var parent = parser.tags[parser.tags.length - 1] || parser
+ var tag = parser.tag = { name: parser.tagName, attributes: {} }
+
+ // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
+ if (parser.opt.xmlns) {
+ tag.ns = parent.ns
+ }
+ parser.attribList.length = 0
+ emitNode(parser, 'onopentagstart', tag)
+ }
- // add namespace info to tag
- var qn = qname(parser.tagName)
- tag.prefix = qn.prefix
- tag.local = qn.local
- tag.uri = tag.ns[qn.prefix] || ""
+ function qname (name, attribute) {
+ var i = name.indexOf(':')
+ var qualName = i < 0 ? [ '', name ] : name.split(':')
+ var prefix = qualName[0]
+ var local = qualName[1]
- if (tag.prefix && !tag.uri) {
- strictFail(parser, "Unbound namespace prefix: "
- + JSON.stringify(parser.tagName))
- tag.uri = qn.prefix
+ // <x "xmlns"="http://foo">
+ if (attribute && name === 'xmlns') {
+ prefix = 'xmlns'
+ local = ''
}
- var parent = parser.tags[parser.tags.length - 1] || parser
- if (tag.ns && parent.ns !== tag.ns) {
- Object.keys(tag.ns).forEach(function (p) {
- emitNode( parser
- , "onopennamespace"
- , { prefix: p , uri: tag.ns[p] } )
- })
+ return { prefix: prefix, local: local }
+ }
+
+ function attrib (parser) {
+ if (!parser.strict) {
+ parser.attribName = parser.attribName[parser.looseCase]()
}
- // handle deferred onattribute events
- // Note: do not apply default ns to attributes:
- // http://www.w3.org/TR/REC-xml-names/#defaulting
- for (var i = 0, l = parser.attribList.length; i < l; i ++) {
- var nv = parser.attribList[i]
- var name = nv[0]
- , value = nv[1]
- , qualName = qname(name, true)
- , prefix = qualName.prefix
- , local = qualName.local
- , uri = prefix == "" ? "" : (tag.ns[prefix] || "")
- , a = { name: name
- , value: value
- , prefix: prefix
- , local: local
- , uri: uri
- }
+ if (parser.attribList.indexOf(parser.attribName) !== -1 ||
+ parser.tag.attributes.hasOwnProperty(parser.attribName)) {
+ parser.attribName = parser.attribValue = ''
+ return
+ }
- // if there's any attributes with an undefined namespace,
- // then fail on them now.
- if (prefix && prefix != "xmlns" && !uri) {
- strictFail(parser, "Unbound namespace prefix: "
- + JSON.stringify(prefix))
- a.uri = prefix
+ if (parser.opt.xmlns) {
+ var qn = qname(parser.attribName, true)
+ var prefix = qn.prefix
+ var local = qn.local
+
+ if (prefix === 'xmlns') {
+ // namespace binding attribute. push the binding into scope
+ if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
+ strictFail(parser,
+ 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
+ 'Actual: ' + parser.attribValue)
+ } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
+ strictFail(parser,
+ 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
+ 'Actual: ' + parser.attribValue)
+ } else {
+ var tag = parser.tag
+ var parent = parser.tags[parser.tags.length - 1] || parser
+ if (tag.ns === parent.ns) {
+ tag.ns = Object.create(parent.ns)
+ }
+ tag.ns[local] = parser.attribValue
+ }
}
- parser.tag.attributes[name] = a
- emitNode(parser, "onattribute", a)
+
+ // defer onattribute events until all attributes have been seen
+ // so any new bindings can take effect. preserve attribute order
+ // so deferred events can be emitted in document order
+ parser.attribList.push([parser.attribName, parser.attribValue])
+ } else {
+ // in non-xmlns mode, we can emit the event right away
+ parser.tag.attributes[parser.attribName] = parser.attribValue
+ emitNode(parser, 'onattribute', {
+ name: parser.attribName,
+ value: parser.attribValue
+ })
}
- parser.attribList.length = 0
+
+ parser.attribName = parser.attribValue = ''
}
- parser.tag.isSelfClosing = !!selfClosing
+ function openTag (parser, selfClosing) {
+ if (parser.opt.xmlns) {
+ // emit namespace binding events
+ var tag = parser.tag
+
+ // add namespace info to tag
+ var qn = qname(parser.tagName)
+ tag.prefix = qn.prefix
+ tag.local = qn.local
+ tag.uri = tag.ns[qn.prefix] || ''
+
+ if (tag.prefix && !tag.uri) {
+ strictFail(parser, 'Unbound namespace prefix: ' +
+ JSON.stringify(parser.tagName))
+ tag.uri = qn.prefix
+ }
- // process the tag
- parser.sawRoot = true
- parser.tags.push(parser.tag)
- emitNode(parser, "onopentag", parser.tag)
- if (!selfClosing) {
- // special case for <script> in non-strict mode.
- if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
- parser.state = S.SCRIPT
- } else {
- parser.state = S.TEXT
+ var parent = parser.tags[parser.tags.length - 1] || parser
+ if (tag.ns && parent.ns !== tag.ns) {
+ Object.keys(tag.ns).forEach(function (p) {
+ emitNode(parser, 'onopennamespace', {
+ prefix: p,
+ uri: tag.ns[p]
+ })
+ })
+ }
+
+ // handle deferred onattribute events
+ // Note: do not apply default ns to attributes:
+ // http://www.w3.org/TR/REC-xml-names/#defaulting
+ for (var i = 0, l = parser.attribList.length; i < l; i++) {
+ var nv = parser.attribList[i]
+ var name = nv[0]
+ var value = nv[1]
+ var qualName = qname(name, true)
+ var prefix = qualName.prefix
+ var local = qualName.local
+ var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
+ var a = {
+ name: name,
+ value: value,
+ prefix: prefix,
+ local: local,
+ uri: uri
+ }
+
+ // if there's any attributes with an undefined namespace,
+ // then fail on them now.
+ if (prefix && prefix !== 'xmlns' && !uri) {
+ strictFail(parser, 'Unbound namespace prefix: ' +
+ JSON.stringify(prefix))
+ a.uri = prefix
+ }
+ parser.tag.attributes[name] = a
+ emitNode(parser, 'onattribute', a)
+ }
+ parser.attribList.length = 0
}
- parser.tag = null
- parser.tagName = ""
- }
- parser.attribName = parser.attribValue = ""
- parser.attribList.length = 0
-}
-
-function closeTag (parser) {
- if (!parser.tagName) {
- strictFail(parser, "Weird empty close tag.")
- parser.textNode += "</>"
- parser.state = S.TEXT
- return
+
+ parser.tag.isSelfClosing = !!selfClosing
+
+ // process the tag
+ parser.sawRoot = true
+ parser.tags.push(parser.tag)
+ emitNode(parser, 'onopentag', parser.tag)
+ if (!selfClosing) {
+ // special case for <script> in non-strict mode.
+ if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
+ parser.state = S.SCRIPT
+ } else {
+ parser.state = S.TEXT
+ }
+ parser.tag = null
+ parser.tagName = ''
+ }
+ parser.attribName = parser.attribValue = ''
+ parser.attribList.length = 0
}
- if (parser.script) {
- if (parser.tagName !== "script") {
- parser.script += "</" + parser.tagName + ">"
- parser.tagName = ""
- parser.state = S.SCRIPT
+ function closeTag (parser) {
+ if (!parser.tagName) {
+ strictFail(parser, 'Weird empty close tag.')
+ parser.textNode += '</>'
+ parser.state = S.TEXT
return
}
- emitNode(parser, "onscript", parser.script)
- parser.script = ""
- }
- // first make sure that the closing tag actually exists.
- // <a><b></c></b></a> will close everything, otherwise.
- var t = parser.tags.length
- var tagName = parser.tagName
- if (!parser.strict) tagName = tagName[parser.looseCase]()
- var closeTo = tagName
- while (t --) {
- var close = parser.tags[t]
- if (close.name !== closeTo) {
- // fail the first time in strict mode
- strictFail(parser, "Unexpected close tag")
- } else break
- }
+ if (parser.script) {
+ if (parser.tagName !== 'script') {
+ parser.script += '</' + parser.tagName + '>'
+ parser.tagName = ''
+ parser.state = S.SCRIPT
+ return
+ }
+ emitNode(parser, 'onscript', parser.script)
+ parser.script = ''
+ }
+
+ // first make sure that the closing tag actually exists.
+ // <a><b></c></b></a> will close everything, otherwise.
+ var t = parser.tags.length
+ var tagName = parser.tagName
+ if (!parser.strict) {
+ tagName = tagName[parser.looseCase]()
+ }
+ var closeTo = tagName
+ while (t--) {
+ var close = parser.tags[t]
+ if (close.name !== closeTo) {
+ // fail the first time in strict mode
+ strictFail(parser, 'Unexpected close tag')
+ } else {
+ break
+ }
+ }
- // didn't find it. we already failed for strict, so just abort.
- if (t < 0) {
- strictFail(parser, "Unmatched closing tag: "+parser.tagName)
- parser.textNode += "</" + parser.tagName + ">"
+ // didn't find it. we already failed for strict, so just abort.
+ if (t < 0) {
+ strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
+ parser.textNode += '</' + parser.tagName + '>'
+ parser.state = S.TEXT
+ return
+ }
+ parser.tagName = tagName
+ var s = parser.tags.length
+ while (s-- > t) {
+ var tag = parser.tag = parser.tags.pop()
+ parser.tagName = parser.tag.name
+ emitNode(parser, 'onclosetag', parser.tagName)
+
+ var x = {}
+ for (var i in tag.ns) {
+ x[i] = tag.ns[i]
+ }
+
+ var parent = parser.tags[parser.tags.length - 1] || parser
+ if (parser.opt.xmlns && tag.ns !== parent.ns) {
+ // remove namespace bindings introduced by tag
+ Object.keys(tag.ns).forEach(function (p) {
+ var n = tag.ns[p]
+ emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
+ })
+ }
+ }
+ if (t === 0) parser.closedRoot = true
+ parser.tagName = parser.attribValue = parser.attribName = ''
+ parser.attribList.length = 0
parser.state = S.TEXT
- return
}
- parser.tagName = tagName
- var s = parser.tags.length
- while (s --> t) {
- var tag = parser.tag = parser.tags.pop()
- parser.tagName = parser.tag.name
- emitNode(parser, "onclosetag", parser.tagName)
- var x = {}
- for (var i in tag.ns) x[i] = tag.ns[i]
+ function parseEntity (parser) {
+ var entity = parser.entity
+ var entityLC = entity.toLowerCase()
+ var num
+ var numStr = ''
- var parent = parser.tags[parser.tags.length - 1] || parser
- if (parser.opt.xmlns && tag.ns !== parent.ns) {
- // remove namespace bindings introduced by tag
- Object.keys(tag.ns).forEach(function (p) {
- var n = tag.ns[p]
- emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
- })
+ if (parser.ENTITIES[entity]) {
+ return parser.ENTITIES[entity]
+ }
+ if (parser.ENTITIES[entityLC]) {
+ return parser.ENTITIES[entityLC]
+ }
+ entity = entityLC
+ if (entity.charAt(0) === '#') {
+ if (entity.charAt(1) === 'x') {
+ entity = entity.slice(2)
+ num = parseInt(entity, 16)
+ numStr = num.toString(16)
+ } else {
+ entity = entity.slice(1)
+ num = parseInt(entity, 10)
+ numStr = num.toString(10)
+ }
}
+ entity = entity.replace(/^0+/, '')
+ if (numStr.toLowerCase() !== entity) {
+ strictFail(parser, 'Invalid character entity')
+ return '&' + parser.entity + ';'
+ }
+
+ return String.fromCodePoint(num)
}
- if (t === 0) parser.closedRoot = true
- parser.tagName = parser.attribValue = parser.attribName = ""
- parser.attribList.length = 0
- parser.state = S.TEXT
-}
-
-function parseEntity (parser) {
- var entity = parser.entity
- , entityLC = entity.toLowerCase()
- , num
- , numStr = ""
- if (parser.ENTITIES[entity])
- return parser.ENTITIES[entity]
- if (parser.ENTITIES[entityLC])
- return parser.ENTITIES[entityLC]
- entity = entityLC
- if (entity.charAt(0) === "#") {
- if (entity.charAt(1) === "x") {
- entity = entity.slice(2)
- num = parseInt(entity, 16)
- numStr = num.toString(16)
- } else {
- entity = entity.slice(1)
- num = parseInt(entity, 10)
- numStr = num.toString(10)
+
+ function beginWhiteSpace (parser, c) {
+ if (c === '<') {
+ parser.state = S.OPEN_WAKA
+ parser.startTagPosition = parser.position
+ } else if (not(whitespace, c)) {
+ // have to process this as a text node.
+ // weird, but happens.
+ strictFail(parser, 'Non-whitespace before first tag.')
+ parser.textNode = c
+ parser.state = S.TEXT
}
}
- entity = entity.replace(/^0+/, "")
- if (numStr.toLowerCase() !== entity) {
- strictFail(parser, "Invalid character entity")
- return "&"+parser.entity + ";"
+
+ function charAt (chunk, i) {
+ var result = ''
+ if (i < chunk.length) {
+ result = chunk.charAt(i)
+ }
+ return result
}
- return String.fromCodePoint(num)
-}
-
-function write (chunk) {
- var parser = this
- if (this.error) throw this.error
- if (parser.closed) return error(parser,
- "Cannot write after close. Assign an onready handler.")
- if (chunk === null) return end(parser)
- var i = 0, c = ""
- while (parser.c = c = chunk.charAt(i++)) {
- if (parser.trackPosition) {
- parser.position ++
- if (c === "\n") {
- parser.line ++
- parser.column = 0
- } else parser.column ++
+ function write (chunk) {
+ var parser = this
+ if (this.error) {
+ throw this.error
+ }
+ if (parser.closed) {
+ return error(parser,
+ 'Cannot write after close. Assign an onready handler.')
+ }
+ if (chunk === null) {
+ return end(parser)
+ }
+ if (typeof chunk === 'object') {
+ chunk = chunk.toString()
}
- switch (parser.state) {
-
- case S.BEGIN:
- if (c === "<") {
- parser.state = S.OPEN_WAKA
- parser.startTagPosition = parser.position
- } else if (not(whitespace,c)) {
- // have to process this as a text node.
- // weird, but happens.
- strictFail(parser, "Non-whitespace before first tag.")
- parser.textNode = c
- parser.state = S.TEXT
+ var i = 0
+ var c = ''
+ while (true) {
+ c = charAt(chunk, i++)
+ parser.c = c
+ if (!c) {
+ break
+ }
+ if (parser.trackPosition) {
+ parser.position++
+ if (c === '\n') {
+ parser.line++
+ parser.column = 0
+ } else {
+ parser.column++
}
- continue
-
- case S.TEXT:
- if (parser.sawRoot && !parser.closedRoot) {
- var starti = i-1
- while (c && c!=="<" && c!=="&") {
- c = chunk.charAt(i++)
- if (c && parser.trackPosition) {
- parser.position ++
- if (c === "\n") {
- parser.line ++
- parser.column = 0
- } else parser.column ++
+ }
+ switch (parser.state) {
+ case S.BEGIN:
+ parser.state = S.BEGIN_WHITESPACE
+ if (c === '\uFEFF') {
+ continue
+ }
+ beginWhiteSpace(parser, c)
+ continue
+
+ case S.BEGIN_WHITESPACE:
+ beginWhiteSpace(parser, c)
+ continue
+
+ case S.TEXT:
+ if (parser.sawRoot && !parser.closedRoot) {
+ var starti = i - 1
+ while (c && c !== '<' && c !== '&') {
+ c = charAt(chunk, i++)
+ if (c && parser.trackPosition) {
+ parser.position++
+ if (c === '\n') {
+ parser.line++
+ parser.column = 0
+ } else {
+ parser.column++
+ }
+ }
}
+ parser.textNode += chunk.substring(starti, i - 1)
}
- parser.textNode += chunk.substring(starti, i-1)
- }
- if (c === "<") {
- parser.state = S.OPEN_WAKA
- parser.startTagPosition = parser.position
- } else {
- if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
- strictFail(parser, "Text data outside of root node.")
- if (c === "&") parser.state = S.TEXT_ENTITY
- else parser.textNode += c
- }
- continue
-
- case S.SCRIPT:
- // only non-strict
- if (c === "<") {
- parser.state = S.SCRIPT_ENDING
- } else parser.script += c
- continue
-
- case S.SCRIPT_ENDING:
- if (c === "/") {
- parser.state = S.CLOSE_TAG
- } else {
- parser.script += "<" + c
- parser.state = S.SCRIPT
- }
- continue
-
- case S.OPEN_WAKA:
- // either a /, ?, !, or text is coming next.
- if (c === "!") {
- parser.state = S.SGML_DECL
- parser.sgmlDecl = ""
- } else if (is(whitespace, c)) {
- // wait for it...
- } else if (is(nameStart,c)) {
- parser.state = S.OPEN_TAG
- parser.tagName = c
- } else if (c === "/") {
- parser.state = S.CLOSE_TAG
- parser.tagName = ""
- } else if (c === "?") {
- parser.state = S.PROC_INST
- parser.procInstName = parser.procInstBody = ""
- } else {
- strictFail(parser, "Unencoded <")
- // if there was some whitespace, then add that in.
- if (parser.startTagPosition + 1 < parser.position) {
- var pad = parser.position - parser.startTagPosition
- c = new Array(pad).join(" ") + c
+ if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
+ parser.state = S.OPEN_WAKA
+ parser.startTagPosition = parser.position
+ } else {
+ if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
+ strictFail(parser, 'Text data outside of root node.')
+ }
+ if (c === '&') {
+ parser.state = S.TEXT_ENTITY
+ } else {
+ parser.textNode += c
+ }
+ }
+ continue
+
+ case S.SCRIPT:
+ // only non-strict
+ if (c === '<') {
+ parser.state = S.SCRIPT_ENDING
+ } else {
+ parser.script += c
+ }
+ continue
+
+ case S.SCRIPT_ENDING:
+ if (c === '/') {
+ parser.state = S.CLOSE_TAG
+ } else {
+ parser.script += '<' + c
+ parser.state = S.SCRIPT
+ }
+ continue
+
+ case S.OPEN_WAKA:
+ // either a /, ?, !, or text is coming next.
+ if (c === '!') {
+ parser.state = S.SGML_DECL
+ parser.sgmlDecl = ''
+ } else if (is(whitespace, c)) {
+ // wait for it...
+ } else if (is(nameStart, c)) {
+ parser.state = S.OPEN_TAG
+ parser.tagName = c
+ } else if (c === '/') {
+ parser.state = S.CLOSE_TAG
+ parser.tagName = ''
+ } else if (c === '?') {
+ parser.state = S.PROC_INST
+ parser.procInstName = parser.procInstBody = ''
+ } else {
+ strictFail(parser, 'Unencoded <')
+ // if there was some whitespace, then add that in.
+ if (parser.startTagPosition + 1 < parser.position) {
+ var pad = parser.position - parser.startTagPosition
+ c = new Array(pad).join(' ') + c
+ }
+ parser.textNode += '<' + c
+ parser.state = S.TEXT
+ }
+ continue
+
+ case S.SGML_DECL:
+ if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
+ emitNode(parser, 'onopencdata')
+ parser.state = S.CDATA
+ parser.sgmlDecl = ''
+ parser.cdata = ''
+ } else if (parser.sgmlDecl + c === '--') {
+ parser.state = S.COMMENT
+ parser.comment = ''
+ parser.sgmlDecl = ''
+ } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
+ parser.state = S.DOCTYPE
+ if (parser.doctype || parser.sawRoot) {
+ strictFail(parser,
+ 'Inappropriately located doctype declaration')
+ }
+ parser.doctype = ''
+ parser.sgmlDecl = ''
+ } else if (c === '>') {
+ emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
+ parser.sgmlDecl = ''
+ parser.state = S.TEXT
+ } else if (is(quote, c)) {
+ parser.state = S.SGML_DECL_QUOTED
+ parser.sgmlDecl += c
+ } else {
+ parser.sgmlDecl += c
+ }
+ continue
+
+ case S.SGML_DECL_QUOTED:
+ if (c === parser.q) {
+ parser.state = S.SGML_DECL
+ parser.q = ''
}
- parser.textNode += "<" + c
- parser.state = S.TEXT
- }
- continue
-
- case S.SGML_DECL:
- if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
- emitNode(parser, "onopencdata")
- parser.state = S.CDATA
- parser.sgmlDecl = ""
- parser.cdata = ""
- } else if (parser.sgmlDecl+c === "--") {
- parser.state = S.COMMENT
- parser.comment = ""
- parser.sgmlDecl = ""
- } else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
- parser.state = S.DOCTYPE
- if (parser.doctype || parser.sawRoot) strictFail(parser,
- "Inappropriately located doctype declaration")
- parser.doctype = ""
- parser.sgmlDecl = ""
- } else if (c === ">") {
- emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
- parser.sgmlDecl = ""
- parser.state = S.TEXT
- } else if (is(quote, c)) {
- parser.state = S.SGML_DECL_QUOTED
parser.sgmlDecl += c
- } else parser.sgmlDecl += c
- continue
+ continue
- case S.SGML_DECL_QUOTED:
- if (c === parser.q) {
- parser.state = S.SGML_DECL
- parser.q = ""
- }
- parser.sgmlDecl += c
- continue
-
- case S.DOCTYPE:
- if (c === ">") {
- parser.state = S.TEXT
- emitNode(parser, "ondoctype", parser.doctype)
- parser.doctype = true // just remember that we saw it.
- } else {
+ case S.DOCTYPE:
+ if (c === '>') {
+ parser.state = S.TEXT
+ emitNode(parser, 'ondoctype', parser.doctype)
+ parser.doctype = true // just remember that we saw it.
+ } else {
+ parser.doctype += c
+ if (c === '[') {
+ parser.state = S.DOCTYPE_DTD
+ } else if (is(quote, c)) {
+ parser.state = S.DOCTYPE_QUOTED
+ parser.q = c
+ }
+ }
+ continue
+
+ case S.DOCTYPE_QUOTED:
parser.doctype += c
- if (c === "[") parser.state = S.DOCTYPE_DTD
- else if (is(quote, c)) {
- parser.state = S.DOCTYPE_QUOTED
+ if (c === parser.q) {
+ parser.q = ''
+ parser.state = S.DOCTYPE
+ }
+ continue
+
+ case S.DOCTYPE_DTD:
+ parser.doctype += c
+ if (c === ']') {
+ parser.state = S.DOCTYPE
+ } else if (is(quote, c)) {
+ parser.state = S.DOCTYPE_DTD_QUOTED
parser.q = c
}
- }
- continue
+ continue
- case S.DOCTYPE_QUOTED:
- parser.doctype += c
- if (c === parser.q) {
- parser.q = ""
- parser.state = S.DOCTYPE
- }
- continue
-
- case S.DOCTYPE_DTD:
- parser.doctype += c
- if (c === "]") parser.state = S.DOCTYPE
- else if (is(quote,c)) {
- parser.state = S.DOCTYPE_DTD_QUOTED
- parser.q = c
- }
- continue
+ case S.DOCTYPE_DTD_QUOTED:
+ parser.doctype += c
+ if (c === parser.q) {
+ parser.state = S.DOCTYPE_DTD
+ parser.q = ''
+ }
+ continue
- case S.DOCTYPE_DTD_QUOTED:
- parser.doctype += c
- if (c === parser.q) {
- parser.state = S.DOCTYPE_DTD
- parser.q = ""
- }
- continue
-
- case S.COMMENT:
- if (c === "-") parser.state = S.COMMENT_ENDING
- else parser.comment += c
- continue
-
- case S.COMMENT_ENDING:
- if (c === "-") {
- parser.state = S.COMMENT_ENDED
- parser.comment = textopts(parser.opt, parser.comment)
- if (parser.comment) emitNode(parser, "oncomment", parser.comment)
- parser.comment = ""
- } else {
- parser.comment += "-" + c
- parser.state = S.COMMENT
- }
- continue
-
- case S.COMMENT_ENDED:
- if (c !== ">") {
- strictFail(parser, "Malformed comment")
- // allow <!-- blah -- bloo --> in non-strict mode,
- // which is a comment of " blah -- bloo "
- parser.comment += "--" + c
- parser.state = S.COMMENT
- } else parser.state = S.TEXT
- continue
-
- case S.CDATA:
- if (c === "]") parser.state = S.CDATA_ENDING
- else parser.cdata += c
- continue
-
- case S.CDATA_ENDING:
- if (c === "]") parser.state = S.CDATA_ENDING_2
- else {
- parser.cdata += "]" + c
- parser.state = S.CDATA
- }
- continue
-
- case S.CDATA_ENDING_2:
- if (c === ">") {
- if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
- emitNode(parser, "onclosecdata")
- parser.cdata = ""
- parser.state = S.TEXT
- } else if (c === "]") {
- parser.cdata += "]"
- } else {
- parser.cdata += "]]" + c
- parser.state = S.CDATA
- }
- continue
-
- case S.PROC_INST:
- if (c === "?") parser.state = S.PROC_INST_ENDING
- else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
- else parser.procInstName += c
- continue
-
- case S.PROC_INST_BODY:
- if (!parser.procInstBody && is(whitespace, c)) continue
- else if (c === "?") parser.state = S.PROC_INST_ENDING
- else parser.procInstBody += c
- continue
-
- case S.PROC_INST_ENDING:
- if (c === ">") {
- emitNode(parser, "onprocessinginstruction", {
- name : parser.procInstName,
- body : parser.procInstBody
- })
- parser.procInstName = parser.procInstBody = ""
- parser.state = S.TEXT
- } else {
- parser.procInstBody += "?" + c
- parser.state = S.PROC_INST_BODY
- }
- continue
-
- case S.OPEN_TAG:
- if (is(nameBody, c)) parser.tagName += c
- else {
- newTag(parser)
- if (c === ">") openTag(parser)
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
- else {
- if (not(whitespace, c)) strictFail(
- parser, "Invalid character in tag name")
+ case S.COMMENT:
+ if (c === '-') {
+ parser.state = S.COMMENT_ENDING
+ } else {
+ parser.comment += c
+ }
+ continue
+
+ case S.COMMENT_ENDING:
+ if (c === '-') {
+ parser.state = S.COMMENT_ENDED
+ parser.comment = textopts(parser.opt, parser.comment)
+ if (parser.comment) {
+ emitNode(parser, 'oncomment', parser.comment)
+ }
+ parser.comment = ''
+ } else {
+ parser.comment += '-' + c
+ parser.state = S.COMMENT
+ }
+ continue
+
+ case S.COMMENT_ENDED:
+ if (c !== '>') {
+ strictFail(parser, 'Malformed comment')
+ // allow <!-- blah -- bloo --> in non-strict mode,
+ // which is a comment of " blah -- bloo "
+ parser.comment += '--' + c
+ parser.state = S.COMMENT
+ } else {
+ parser.state = S.TEXT
+ }
+ continue
+
+ case S.CDATA:
+ if (c === ']') {
+ parser.state = S.CDATA_ENDING
+ } else {
+ parser.cdata += c
+ }
+ continue
+
+ case S.CDATA_ENDING:
+ if (c === ']') {
+ parser.state = S.CDATA_ENDING_2
+ } else {
+ parser.cdata += ']' + c
+ parser.state = S.CDATA
+ }
+ continue
+
+ case S.CDATA_ENDING_2:
+ if (c === '>') {
+ if (parser.cdata) {
+ emitNode(parser, 'oncdata', parser.cdata)
+ }
+ emitNode(parser, 'onclosecdata')
+ parser.cdata = ''
+ parser.state = S.TEXT
+ } else if (c === ']') {
+ parser.cdata += ']'
+ } else {
+ parser.cdata += ']]' + c
+ parser.state = S.CDATA
+ }
+ continue
+
+ case S.PROC_INST:
+ if (c === '?') {
+ parser.state = S.PROC_INST_ENDING
+ } else if (is(whitespace, c)) {
+ parser.state = S.PROC_INST_BODY
+ } else {
+ parser.procInstName += c
+ }
+ continue
+
+ case S.PROC_INST_BODY:
+ if (!parser.procInstBody && is(whitespace, c)) {
+ continue
+ } else if (c === '?') {
+ parser.state = S.PROC_INST_ENDING
+ } else {
+ parser.procInstBody += c
+ }
+ continue
+
+ case S.PROC_INST_ENDING:
+ if (c === '>') {
+ emitNode(parser, 'onprocessinginstruction', {
+ name: parser.procInstName,
+ body: parser.procInstBody
+ })
+ parser.procInstName = parser.procInstBody = ''
+ parser.state = S.TEXT
+ } else {
+ parser.procInstBody += '?' + c
+ parser.state = S.PROC_INST_BODY
+ }
+ continue
+
+ case S.OPEN_TAG:
+ if (is(nameBody, c)) {
+ parser.tagName += c
+ } else {
+ newTag(parser)
+ if (c === '>') {
+ openTag(parser)
+ } else if (c === '/') {
+ parser.state = S.OPEN_TAG_SLASH
+ } else {
+ if (not(whitespace, c)) {
+ strictFail(parser, 'Invalid character in tag name')
+ }
+ parser.state = S.ATTRIB
+ }
+ }
+ continue
+
+ case S.OPEN_TAG_SLASH:
+ if (c === '>') {
+ openTag(parser, true)
+ closeTag(parser)
+ } else {
+ strictFail(parser, 'Forward-slash in opening tag not followed by >')
parser.state = S.ATTRIB
}
- }
- continue
+ continue
- case S.OPEN_TAG_SLASH:
- if (c === ">") {
- openTag(parser, true)
- closeTag(parser)
- } else {
- strictFail(parser, "Forward-slash in opening tag not followed by >")
- parser.state = S.ATTRIB
- }
- continue
-
- case S.ATTRIB:
- // haven't read the attribute name yet.
- if (is(whitespace, c)) continue
- else if (c === ">") openTag(parser)
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
- else if (is(nameStart, c)) {
- parser.attribName = c
- parser.attribValue = ""
- parser.state = S.ATTRIB_NAME
- } else strictFail(parser, "Invalid attribute name")
- continue
-
- case S.ATTRIB_NAME:
- if (c === "=") parser.state = S.ATTRIB_VALUE
- else if (c === ">") {
- strictFail(parser, "Attribute without value")
- parser.attribValue = parser.attribName
- attrib(parser)
- openTag(parser)
- }
- else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
- else if (is(nameBody, c)) parser.attribName += c
- else strictFail(parser, "Invalid attribute name")
- continue
-
- case S.ATTRIB_NAME_SAW_WHITE:
- if (c === "=") parser.state = S.ATTRIB_VALUE
- else if (is(whitespace, c)) continue
- else {
- strictFail(parser, "Attribute without value")
- parser.tag.attributes[parser.attribName] = ""
- parser.attribValue = ""
- emitNode(parser, "onattribute",
- { name : parser.attribName, value : "" })
- parser.attribName = ""
- if (c === ">") openTag(parser)
- else if (is(nameStart, c)) {
+ case S.ATTRIB:
+ // haven't read the attribute name yet.
+ if (is(whitespace, c)) {
+ continue
+ } else if (c === '>') {
+ openTag(parser)
+ } else if (c === '/') {
+ parser.state = S.OPEN_TAG_SLASH
+ } else if (is(nameStart, c)) {
parser.attribName = c
+ parser.attribValue = ''
parser.state = S.ATTRIB_NAME
} else {
- strictFail(parser, "Invalid attribute name")
- parser.state = S.ATTRIB
+ strictFail(parser, 'Invalid attribute name')
}
- }
- continue
+ continue
- case S.ATTRIB_VALUE:
- if (is(whitespace, c)) continue
- else if (is(quote, c)) {
- parser.q = c
- parser.state = S.ATTRIB_VALUE_QUOTED
- } else {
- strictFail(parser, "Unquoted attribute value")
- parser.state = S.ATTRIB_VALUE_UNQUOTED
- parser.attribValue = c
- }
- continue
+ case S.ATTRIB_NAME:
+ if (c === '=') {
+ parser.state = S.ATTRIB_VALUE
+ } else if (c === '>') {
+ strictFail(parser, 'Attribute without value')
+ parser.attribValue = parser.attribName
+ attrib(parser)
+ openTag(parser)
+ } else if (is(whitespace, c)) {
+ parser.state = S.ATTRIB_NAME_SAW_WHITE
+ } else if (is(nameBody, c)) {
+ parser.attribName += c
+ } else {
+ strictFail(parser, 'Invalid attribute name')
+ }
+ continue
- case S.ATTRIB_VALUE_QUOTED:
- if (c !== parser.q) {
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
- else parser.attribValue += c
+ case S.ATTRIB_NAME_SAW_WHITE:
+ if (c === '=') {
+ parser.state = S.ATTRIB_VALUE
+ } else if (is(whitespace, c)) {
+ continue
+ } else {
+ strictFail(parser, 'Attribute without value')
+ parser.tag.attributes[parser.attribName] = ''
+ parser.attribValue = ''
+ emitNode(parser, 'onattribute', {
+ name: parser.attribName,
+ value: ''
+ })
+ parser.attribName = ''
+ if (c === '>') {
+ openTag(parser)
+ } else if (is(nameStart, c)) {
+ parser.attribName = c
+ parser.state = S.ATTRIB_NAME
+ } else {
+ strictFail(parser, 'Invalid attribute name')
+ parser.state = S.ATTRIB
+ }
+ }
continue
- }
- attrib(parser)
- parser.q = ""
- parser.state = S.ATTRIB_VALUE_CLOSED
- continue
-
- case S.ATTRIB_VALUE_CLOSED:
- if (is(whitespace, c)) {
- parser.state = S.ATTRIB
- } else if (c === ">") openTag(parser)
- else if (c === "/") parser.state = S.OPEN_TAG_SLASH
- else if (is(nameStart, c)) {
- strictFail(parser, "No whitespace between attributes")
- parser.attribName = c
- parser.attribValue = ""
- parser.state = S.ATTRIB_NAME
- } else strictFail(parser, "Invalid attribute name")
- continue
-
- case S.ATTRIB_VALUE_UNQUOTED:
- if (not(attribEnd,c)) {
- if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
- else parser.attribValue += c
+
+ case S.ATTRIB_VALUE:
+ if (is(whitespace, c)) {
+ continue
+ } else if (is(quote, c)) {
+ parser.q = c
+ parser.state = S.ATTRIB_VALUE_QUOTED
+ } else {
+ strictFail(parser, 'Unquoted attribute value')
+ parser.state = S.ATTRIB_VALUE_UNQUOTED
+ parser.attribValue = c
+ }
continue
- }
- attrib(parser)
- if (c === ">") openTag(parser)
- else parser.state = S.ATTRIB
- continue
-
- case S.CLOSE_TAG:
- if (!parser.tagName) {
- if (is(whitespace, c)) continue
- else if (not(nameStart, c)) {
- if (parser.script) {
- parser.script += "</" + c
- parser.state = S.SCRIPT
+
+ case S.ATTRIB_VALUE_QUOTED:
+ if (c !== parser.q) {
+ if (c === '&') {
+ parser.state = S.ATTRIB_VALUE_ENTITY_Q
} else {
- strictFail(parser, "Invalid tagname in closing tag.")
+ parser.attribValue += c
}
- } else parser.tagName = c
- }
- else if (c === ">") closeTag(parser)
- else if (is(nameBody, c)) parser.tagName += c
- else if (parser.script) {
- parser.script += "</" + parser.tagName
- parser.tagName = ""
- parser.state = S.SCRIPT
- } else {
- if (not(whitespace, c)) strictFail(parser,
- "Invalid tagname in closing tag")
- parser.state = S.CLOSE_TAG_SAW_WHITE
- }
- continue
-
- case S.CLOSE_TAG_SAW_WHITE:
- if (is(whitespace, c)) continue
- if (c === ">") closeTag(parser)
- else strictFail(parser, "Invalid characters in closing tag")
- continue
-
- case S.TEXT_ENTITY:
- case S.ATTRIB_VALUE_ENTITY_Q:
- case S.ATTRIB_VALUE_ENTITY_U:
- switch(parser.state) {
- case S.TEXT_ENTITY:
- var returnState = S.TEXT, buffer = "textNode"
- break
-
- case S.ATTRIB_VALUE_ENTITY_Q:
- var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
- break
-
- case S.ATTRIB_VALUE_ENTITY_U:
- var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
- break
- }
- if (c === ";") {
- parser[buffer] += parseEntity(parser)
- parser.entity = ""
- parser.state = returnState
- }
- else if (is(entity, c)) parser.entity += c
- else {
- strictFail(parser, "Invalid character entity")
- parser[buffer] += "&" + parser.entity + c
- parser.entity = ""
- parser.state = returnState
- }
- continue
+ continue
+ }
+ attrib(parser)
+ parser.q = ''
+ parser.state = S.ATTRIB_VALUE_CLOSED
+ continue
+
+ case S.ATTRIB_VALUE_CLOSED:
+ if (is(whitespace, c)) {
+ parser.state = S.ATTRIB
+ } else if (c === '>') {
+ openTag(parser)
+ } else if (c === '/') {
+ parser.state = S.OPEN_TAG_SLASH
+ } else if (is(nameStart, c)) {
+ strictFail(parser, 'No whitespace between attributes')
+ parser.attribName = c
+ parser.attribValue = ''
+ parser.state = S.ATTRIB_NAME
+ } else {
+ strictFail(parser, 'Invalid attribute name')
+ }
+ continue
- default:
- throw new Error(parser, "Unknown state: " + parser.state)
+ case S.ATTRIB_VALUE_UNQUOTED:
+ if (not(attribEnd, c)) {
+ if (c === '&') {
+ parser.state = S.ATTRIB_VALUE_ENTITY_U
+ } else {
+ parser.attribValue += c
+ }
+ continue
+ }
+ attrib(parser)
+ if (c === '>') {
+ openTag(parser)
+ } else {
+ parser.state = S.ATTRIB
+ }
+ continue
+
+ case S.CLOSE_TAG:
+ if (!parser.tagName) {
+ if (is(whitespace, c)) {
+ continue
+ } else if (not(nameStart, c)) {
+ if (parser.script) {
+ parser.script += '</' + c
+ parser.state = S.SCRIPT
+ } else {
+ strictFail(parser, 'Invalid tagname in closing tag.')
+ }
+ } else {
+ parser.tagName = c
+ }
+ } else if (c === '>') {
+ closeTag(parser)
+ } else if (is(nameBody, c)) {
+ parser.tagName += c
+ } else if (parser.script) {
+ parser.script += '</' + parser.tagName
+ parser.tagName = ''
+ parser.state = S.SCRIPT
+ } else {
+ if (not(whitespace, c)) {
+ strictFail(parser, 'Invalid tagname in closing tag')
+ }
+ parser.state = S.CLOSE_TAG_SAW_WHITE
+ }
+ continue
+
+ case S.CLOSE_TAG_SAW_WHITE:
+ if (is(whitespace, c)) {
+ continue
+ }
+ if (c === '>') {
+ closeTag(parser)
+ } else {
+ strictFail(parser, 'Invalid characters in closing tag')
+ }
+ continue
+
+ case S.TEXT_ENTITY:
+ case S.ATTRIB_VALUE_ENTITY_Q:
+ case S.ATTRIB_VALUE_ENTITY_U:
+ var returnState
+ var buffer
+ switch (parser.state) {
+ case S.TEXT_ENTITY:
+ returnState = S.TEXT
+ buffer = 'textNode'
+ break
+
+ case S.ATTRIB_VALUE_ENTITY_Q:
+ returnState = S.ATTRIB_VALUE_QUOTED
+ buffer = 'attribValue'
+ break
+
+ case S.ATTRIB_VALUE_ENTITY_U:
+ returnState = S.ATTRIB_VALUE_UNQUOTED
+ buffer = 'attribValue'
+ break
+ }
+
+ if (c === ';') {
+ parser[buffer] += parseEntity(parser)
+ parser.entity = ''
+ parser.state = returnState
+ } else if (is(parser.entity.length ? entityBody : entityStart, c)) {
+ parser.entity += c
+ } else {
+ strictFail(parser, 'Invalid character in entity name')
+ parser[buffer] += '&' + parser.entity + c
+ parser.entity = ''
+ parser.state = returnState
+ }
+
+ continue
+
+ default:
+ throw new Error(parser, 'Unknown state: ' + parser.state)
+ }
+ } // while
+
+ if (parser.position >= parser.bufferCheckPosition) {
+ checkBufferLength(parser)
}
- } // while
- // cdata blocks can get very big under normal conditions. emit and move on.
- // if (parser.state === S.CDATA && parser.cdata) {
- // emitNode(parser, "oncdata", parser.cdata)
- // parser.cdata = ""
- // }
- if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
- return parser
-}
-
-/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
-if (!String.fromCodePoint) {
- (function() {
- var stringFromCharCode = String.fromCharCode;
- var floor = Math.floor;
- var fromCodePoint = function() {
- var MAX_SIZE = 0x4000;
- var codeUnits = [];
- var highSurrogate;
- var lowSurrogate;
- var index = -1;
- var length = arguments.length;
- if (!length) {
- return '';
- }
- var result = '';
- while (++index < length) {
- var codePoint = Number(arguments[index]);
- if (
- !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
- codePoint < 0 || // not a valid Unicode code point
- codePoint > 0x10FFFF || // not a valid Unicode code point
- floor(codePoint) != codePoint // not an integer
- ) {
- throw RangeError('Invalid code point: ' + codePoint);
- }
- if (codePoint <= 0xFFFF) { // BMP code point
- codeUnits.push(codePoint);
- } else { // Astral code point; split in surrogate halves
- // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
- codePoint -= 0x10000;
- highSurrogate = (codePoint >> 10) + 0xD800;
- lowSurrogate = (codePoint % 0x400) + 0xDC00;
- codeUnits.push(highSurrogate, lowSurrogate);
- }
- if (index + 1 == length || codeUnits.length > MAX_SIZE) {
- result += stringFromCharCode.apply(null, codeUnits);
- codeUnits.length = 0;
- }
- }
- return result;
- };
- if (Object.defineProperty) {
- Object.defineProperty(String, 'fromCodePoint', {
- 'value': fromCodePoint,
- 'configurable': true,
- 'writable': true
- });
- } else {
- String.fromCodePoint = fromCodePoint;
- }
- }());
-}
+ return parser
+ }
-})(typeof exports === "undefined" ? sax = {} : exports);
+ /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
+ if (!String.fromCodePoint) {
+ (function () {
+ var stringFromCharCode = String.fromCharCode
+ var floor = Math.floor
+ var fromCodePoint = function () {
+ var MAX_SIZE = 0x4000
+ var codeUnits = []
+ var highSurrogate
+ var lowSurrogate
+ var index = -1
+ var length = arguments.length
+ if (!length) {
+ return ''
+ }
+ var result = ''
+ while (++index < length) {
+ var codePoint = Number(arguments[index])
+ if (
+ !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
+ codePoint < 0 || // not a valid Unicode code point
+ codePoint > 0x10FFFF || // not a valid Unicode code point
+ floor(codePoint) !== codePoint // not an integer
+ ) {
+ throw RangeError('Invalid code point: ' + codePoint)
+ }
+ if (codePoint <= 0xFFFF) { // BMP code point
+ codeUnits.push(codePoint)
+ } else { // Astral code point; split in surrogate halves
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+ codePoint -= 0x10000
+ highSurrogate = (codePoint >> 10) + 0xD800
+ lowSurrogate = (codePoint % 0x400) + 0xDC00
+ codeUnits.push(highSurrogate, lowSurrogate)
+ }
+ if (index + 1 === length || codeUnits.length > MAX_SIZE) {
+ result += stringFromCharCode.apply(null, codeUnits)
+ codeUnits.length = 0
+ }
+ }
+ return result
+ }
+ if (Object.defineProperty) {
+ Object.defineProperty(String, 'fromCodePoint', {
+ value: fromCodePoint,
+ configurable: true,
+ writable: true
+ })
+ } else {
+ String.fromCodePoint = fromCodePoint
+ }
+ }())
+ }
+})(typeof exports === 'undefined' ? this.sax = {} : exports)