1411 lines
39 KiB
JavaScript
1411 lines
39 KiB
JavaScript
// wrapper for non-node envs
|
|
;(function (sax) {
|
|
|
|
sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
|
|
sax.SAXParser = SAXParser
|
|
sax.SAXStream = SAXStream
|
|
sax.createStream = createStream
|
|
|
|
// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
|
|
// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
|
|
// since that's the earliest that a buffer overrun could occur. This way, checks are
|
|
// as rare as required, but as often as necessary to ensure never crossing this bound.
|
|
// Furthermore, buffers are only tested at most once per write(), so passing a very
|
|
// large string into write() might have undesirable effects, but this is manageable by
|
|
// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
|
|
// edge case, result in creating at most one complete copy of the string passed in.
|
|
// Set to Infinity to have unlimited buffers.
|
|
sax.MAX_BUFFER_LENGTH = 64 * 1024
|
|
|
|
var buffers = [
|
|
"comment", "sgmlDecl", "textNode", "tagName", "doctype",
|
|
"procInstName", "procInstBody", "entity", "attribName",
|
|
"attribValue", "cdata", "script"
|
|
]
|
|
|
|
sax.EVENTS = // for discoverability.
|
|
[ "text"
|
|
, "processinginstruction"
|
|
, "sgmldeclaration"
|
|
, "doctype"
|
|
, "comment"
|
|
, "attribute"
|
|
, "opentag"
|
|
, "closetag"
|
|
, "opencdata"
|
|
, "cdata"
|
|
, "closecdata"
|
|
, "error"
|
|
, "end"
|
|
, "ready"
|
|
, "script"
|
|
, "opennamespace"
|
|
, "closenamespace"
|
|
]
|
|
|
|
function SAXParser (strict, opt) {
|
|
if (!(this instanceof SAXParser)) return new SAXParser(strict, opt)
|
|
|
|
var parser = this
|
|
clearBuffers(parser)
|
|
parser.q = parser.c = ""
|
|
parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
|
|
parser.opt = opt || {}
|
|
parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
|
|
parser.looseCase = parser.opt.lowercase ? "toLowerCase" : "toUpperCase"
|
|
parser.tags = []
|
|
parser.closed = parser.closedRoot = parser.sawRoot = false
|
|
parser.tag = parser.error = null
|
|
parser.strict = !!strict
|
|
parser.noscript = !!(strict || parser.opt.noscript)
|
|
parser.state = S.BEGIN
|
|
parser.ENTITIES = Object.create(sax.ENTITIES)
|
|
parser.attribList = []
|
|
|
|
// namespaces form a prototype chain.
|
|
// it always points at the current tag,
|
|
// which protos to its parent tag.
|
|
if (parser.opt.xmlns) parser.ns = Object.create(rootNS)
|
|
|
|
// mostly just for error reporting
|
|
parser.trackPosition = parser.opt.position !== false
|
|
if (parser.trackPosition) {
|
|
parser.position = parser.line = parser.column = 0
|
|
}
|
|
emit(parser, "onready")
|
|
}
|
|
|
|
if (!Object.create) Object.create = function (o) {
|
|
function f () { this.__proto__ = o }
|
|
f.prototype = o
|
|
return new f
|
|
}
|
|
|
|
if (!Object.getPrototypeOf) Object.getPrototypeOf = function (o) {
|
|
return o.__proto__
|
|
}
|
|
|
|
if (!Object.keys) Object.keys = function (o) {
|
|
var a = []
|
|
for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
|
|
return a
|
|
}
|
|
|
|
function checkBufferLength (parser) {
|
|
var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
|
|
, maxActual = 0
|
|
for (var i = 0, l = buffers.length; i < l; i ++) {
|
|
var len = parser[buffers[i]].length
|
|
if (len > maxAllowed) {
|
|
// Text/cdata nodes can get big, and since they're buffered,
|
|
// we can get here under normal conditions.
|
|
// Avoid issues by emitting the text node now,
|
|
// so at least it won't get any bigger.
|
|
switch (buffers[i]) {
|
|
case "textNode":
|
|
closeText(parser)
|
|
break
|
|
|
|
case "cdata":
|
|
emitNode(parser, "oncdata", parser.cdata)
|
|
parser.cdata = ""
|
|
break
|
|
|
|
case "script":
|
|
emitNode(parser, "onscript", parser.script)
|
|
parser.script = ""
|
|
break
|
|
|
|
default:
|
|
error(parser, "Max buffer length exceeded: "+buffers[i])
|
|
}
|
|
}
|
|
maxActual = Math.max(maxActual, len)
|
|
}
|
|
// schedule the next check for the earliest possible buffer overrun.
|
|
parser.bufferCheckPosition = (sax.MAX_BUFFER_LENGTH - maxActual)
|
|
+ parser.position
|
|
}
|
|
|
|
function clearBuffers (parser) {
|
|
for (var i = 0, l = buffers.length; i < l; i ++) {
|
|
parser[buffers[i]] = ""
|
|
}
|
|
}
|
|
|
|
function flushBuffers (parser) {
|
|
closeText(parser)
|
|
if (parser.cdata !== "") {
|
|
emitNode(parser, "oncdata", parser.cdata)
|
|
parser.cdata = ""
|
|
}
|
|
if (parser.script !== "") {
|
|
emitNode(parser, "onscript", parser.script)
|
|
parser.script = ""
|
|
}
|
|
}
|
|
|
|
SAXParser.prototype =
|
|
{ end: function () { end(this) }
|
|
, write: write
|
|
, resume: function () { this.error = null; return this }
|
|
, close: function () { return this.write(null) }
|
|
, flush: function () { flushBuffers(this) }
|
|
}
|
|
|
|
try {
|
|
var Stream = require("stream").Stream
|
|
} catch (ex) {
|
|
var Stream = function () {}
|
|
}
|
|
|
|
|
|
var streamWraps = sax.EVENTS.filter(function (ev) {
|
|
return ev !== "error" && ev !== "end"
|
|
})
|
|
|
|
function createStream (strict, opt) {
|
|
return new SAXStream(strict, opt)
|
|
}
|
|
|
|
function SAXStream (strict, opt) {
|
|
if (!(this instanceof SAXStream)) return new SAXStream(strict, opt)
|
|
|
|
Stream.apply(this)
|
|
|
|
this._parser = new SAXParser(strict, opt)
|
|
this.writable = true
|
|
this.readable = true
|
|
|
|
|
|
var me = this
|
|
|
|
this._parser.onend = function () {
|
|
me.emit("end")
|
|
}
|
|
|
|
this._parser.onerror = function (er) {
|
|
me.emit("error", er)
|
|
|
|
// if didn't throw, then means error was handled.
|
|
// go ahead and clear error, so we can write again.
|
|
me._parser.error = null
|
|
}
|
|
|
|
this._decoder = null;
|
|
|
|
streamWraps.forEach(function (ev) {
|
|
Object.defineProperty(me, "on" + ev, {
|
|
get: function () { return me._parser["on" + ev] },
|
|
set: function (h) {
|
|
if (!h) {
|
|
me.removeAllListeners(ev)
|
|
return me._parser["on"+ev] = h
|
|
}
|
|
me.on(ev, h)
|
|
},
|
|
enumerable: true,
|
|
configurable: false
|
|
})
|
|
})
|
|
}
|
|
|
|
SAXStream.prototype = Object.create(Stream.prototype,
|
|
{ constructor: { value: SAXStream } })
|
|
|
|
SAXStream.prototype.write = function (data) {
|
|
if (typeof Buffer === 'function' &&
|
|
typeof Buffer.isBuffer === 'function' &&
|
|
Buffer.isBuffer(data)) {
|
|
if (!this._decoder) {
|
|
var SD = require('string_decoder').StringDecoder
|
|
this._decoder = new SD('utf8')
|
|
}
|
|
data = this._decoder.write(data);
|
|
}
|
|
|
|
this._parser.write(data.toString())
|
|
this.emit("data", data)
|
|
return true
|
|
}
|
|
|
|
SAXStream.prototype.end = function (chunk) {
|
|
if (chunk && chunk.length) this.write(chunk)
|
|
this._parser.end()
|
|
return true
|
|
}
|
|
|
|
SAXStream.prototype.on = function (ev, handler) {
|
|
var me = this
|
|
if (!me._parser["on"+ev] && streamWraps.indexOf(ev) !== -1) {
|
|
me._parser["on"+ev] = function () {
|
|
var args = arguments.length === 1 ? [arguments[0]]
|
|
: Array.apply(null, arguments)
|
|
args.splice(0, 0, ev)
|
|
me.emit.apply(me, args)
|
|
}
|
|
}
|
|
|
|
return Stream.prototype.on.call(me, ev, handler)
|
|
}
|
|
|
|
|
|
|
|
// character classes and tokens
|
|
var whitespace = "\r\n\t "
|
|
// this really needs to be replaced with character classes.
|
|
// XML allows all manner of ridiculous numbers and digits.
|
|
, number = "0124356789"
|
|
, letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
// (Letter | "_" | ":")
|
|
, quote = "'\""
|
|
, entity = number+letter+"#"
|
|
, attribEnd = whitespace + ">"
|
|
, CDATA = "[CDATA["
|
|
, DOCTYPE = "DOCTYPE"
|
|
, XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
, XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
|
|
, rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
|
|
|
|
// turn all the string character sets into character class objects.
|
|
whitespace = charClass(whitespace)
|
|
number = charClass(number)
|
|
letter = charClass(letter)
|
|
|
|
// http://www.w3.org/TR/REC-xml/#NT-NameStartChar
|
|
// This implementation works on strings, a single character at a time
|
|
// as such, it cannot ever support astral-plane characters (10000-EFFFF)
|
|
// without a significant breaking change to either this parser, or the
|
|
// JavaScript language. Implementation of an emoji-capable xml parser
|
|
// is left as an exercise for the reader.
|
|
var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
|
|
|
|
var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
|
|
|
|
quote = charClass(quote)
|
|
entity = charClass(entity)
|
|
attribEnd = charClass(attribEnd)
|
|
|
|
function charClass (str) {
|
|
return str.split("").reduce(function (s, c) {
|
|
s[c] = true
|
|
return s
|
|
}, {})
|
|
}
|
|
|
|
function isRegExp (c) {
|
|
return Object.prototype.toString.call(c) === '[object RegExp]'
|
|
}
|
|
|
|
function is (charclass, c) {
|
|
return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
|
|
}
|
|
|
|
function not (charclass, c) {
|
|
return !is(charclass, c)
|
|
}
|
|
|
|
var S = 0
|
|
sax.STATE =
|
|
{ BEGIN : S++
|
|
, TEXT : S++ // general stuff
|
|
, TEXT_ENTITY : S++ // & and such.
|
|
, OPEN_WAKA : S++ // <
|
|
, SGML_DECL : S++ // <!BLARG
|
|
, SGML_DECL_QUOTED : S++ // <!BLARG foo "bar
|
|
, DOCTYPE : S++ // <!DOCTYPE
|
|
, DOCTYPE_QUOTED : S++ // <!DOCTYPE "//blah
|
|
, DOCTYPE_DTD : S++ // <!DOCTYPE "//blah" [ ...
|
|
, DOCTYPE_DTD_QUOTED : S++ // <!DOCTYPE "//blah" [ "foo
|
|
, COMMENT_STARTING : S++ // <!-
|
|
, COMMENT : S++ // <!--
|
|
, COMMENT_ENDING : S++ // <!-- blah -
|
|
, COMMENT_ENDED : S++ // <!-- blah --
|
|
, CDATA : S++ // <![CDATA[ something
|
|
, CDATA_ENDING : S++ // ]
|
|
, CDATA_ENDING_2 : S++ // ]]
|
|
, PROC_INST : S++ // <?hi
|
|
, PROC_INST_BODY : S++ // <?hi there
|
|
, PROC_INST_ENDING : S++ // <?hi "there" ?
|
|
, OPEN_TAG : S++ // <strong
|
|
, OPEN_TAG_SLASH : S++ // <strong /
|
|
, ATTRIB : S++ // <a
|
|
, ATTRIB_NAME : S++ // <a foo
|
|
, ATTRIB_NAME_SAW_WHITE : S++ // <a foo _
|
|
, ATTRIB_VALUE : S++ // <a foo=
|
|
, ATTRIB_VALUE_QUOTED : S++ // <a foo="bar
|
|
, ATTRIB_VALUE_CLOSED : S++ // <a foo="bar"
|
|
, ATTRIB_VALUE_UNQUOTED : S++ // <a foo=bar
|
|
, ATTRIB_VALUE_ENTITY_Q : S++ // <foo bar="""
|
|
, ATTRIB_VALUE_ENTITY_U : S++ // <foo bar="
|
|
, CLOSE_TAG : S++ // </a
|
|
, CLOSE_TAG_SAW_WHITE : S++ // </a >
|
|
, SCRIPT : S++ // <script> ...
|
|
, SCRIPT_ENDING : S++ // <script> ... <
|
|
}
|
|
|
|
sax.ENTITIES =
|
|
{ "amp" : "&"
|
|
, "gt" : ">"
|
|
, "lt" : "<"
|
|
, "quot" : "\""
|
|
, "apos" : "'"
|
|
, "AElig" : 198
|
|
, "Aacute" : 193
|
|
, "Acirc" : 194
|
|
, "Agrave" : 192
|
|
, "Aring" : 197
|
|
, "Atilde" : 195
|
|
, "Auml" : 196
|
|
, "Ccedil" : 199
|
|
, "ETH" : 208
|
|
, "Eacute" : 201
|
|
, "Ecirc" : 202
|
|
, "Egrave" : 200
|
|
, "Euml" : 203
|
|
, "Iacute" : 205
|
|
, "Icirc" : 206
|
|
, "Igrave" : 204
|
|
, "Iuml" : 207
|
|
, "Ntilde" : 209
|
|
, "Oacute" : 211
|
|
, "Ocirc" : 212
|
|
, "Ograve" : 210
|
|
, "Oslash" : 216
|
|
, "Otilde" : 213
|
|
, "Ouml" : 214
|
|
, "THORN" : 222
|
|
, "Uacute" : 218
|
|
, "Ucirc" : 219
|
|
, "Ugrave" : 217
|
|
, "Uuml" : 220
|
|
, "Yacute" : 221
|
|
, "aacute" : 225
|
|
, "acirc" : 226
|
|
, "aelig" : 230
|
|
, "agrave" : 224
|
|
, "aring" : 229
|
|
, "atilde" : 227
|
|
, "auml" : 228
|
|
, "ccedil" : 231
|
|
, "eacute" : 233
|
|
, "ecirc" : 234
|
|
, "egrave" : 232
|
|
, "eth" : 240
|
|
, "euml" : 235
|
|
, "iacute" : 237
|
|
, "icirc" : 238
|
|
, "igrave" : 236
|
|
, "iuml" : 239
|
|
, "ntilde" : 241
|
|
, "oacute" : 243
|
|
, "ocirc" : 244
|
|
, "ograve" : 242
|
|
, "oslash" : 248
|
|
, "otilde" : 245
|
|
, "ouml" : 246
|
|
, "szlig" : 223
|
|
, "thorn" : 254
|
|
, "uacute" : 250
|
|
, "ucirc" : 251
|
|
, "ugrave" : 249
|
|
, "uuml" : 252
|
|
, "yacute" : 253
|
|
, "yuml" : 255
|
|
, "copy" : 169
|
|
, "reg" : 174
|
|
, "nbsp" : 160
|
|
, "iexcl" : 161
|
|
, "cent" : 162
|
|
, "pound" : 163
|
|
, "curren" : 164
|
|
, "yen" : 165
|
|
, "brvbar" : 166
|
|
, "sect" : 167
|
|
, "uml" : 168
|
|
, "ordf" : 170
|
|
, "laquo" : 171
|
|
, "not" : 172
|
|
, "shy" : 173
|
|
, "macr" : 175
|
|
, "deg" : 176
|
|
, "plusmn" : 177
|
|
, "sup1" : 185
|
|
, "sup2" : 178
|
|
, "sup3" : 179
|
|
, "acute" : 180
|
|
, "micro" : 181
|
|
, "para" : 182
|
|
, "middot" : 183
|
|
, "cedil" : 184
|
|
, "ordm" : 186
|
|
, "raquo" : 187
|
|
, "frac14" : 188
|
|
, "frac12" : 189
|
|
, "frac34" : 190
|
|
, "iquest" : 191
|
|
, "times" : 215
|
|
, "divide" : 247
|
|
, "OElig" : 338
|
|
, "oelig" : 339
|
|
, "Scaron" : 352
|
|
, "scaron" : 353
|
|
, "Yuml" : 376
|
|
, "fnof" : 402
|
|
, "circ" : 710
|
|
, "tilde" : 732
|
|
, "Alpha" : 913
|
|
, "Beta" : 914
|
|
, "Gamma" : 915
|
|
, "Delta" : 916
|
|
, "Epsilon" : 917
|
|
, "Zeta" : 918
|
|
, "Eta" : 919
|
|
, "Theta" : 920
|
|
, "Iota" : 921
|
|
, "Kappa" : 922
|
|
, "Lambda" : 923
|
|
, "Mu" : 924
|
|
, "Nu" : 925
|
|
, "Xi" : 926
|
|
, "Omicron" : 927
|
|
, "Pi" : 928
|
|
, "Rho" : 929
|
|
, "Sigma" : 931
|
|
, "Tau" : 932
|
|
, "Upsilon" : 933
|
|
, "Phi" : 934
|
|
, "Chi" : 935
|
|
, "Psi" : 936
|
|
, "Omega" : 937
|
|
, "alpha" : 945
|
|
, "beta" : 946
|
|
, "gamma" : 947
|
|
, "delta" : 948
|
|
, "epsilon" : 949
|
|
, "zeta" : 950
|
|
, "eta" : 951
|
|
, "theta" : 952
|
|
, "iota" : 953
|
|
, "kappa" : 954
|
|
, "lambda" : 955
|
|
, "mu" : 956
|
|
, "nu" : 957
|
|
, "xi" : 958
|
|
, "omicron" : 959
|
|
, "pi" : 960
|
|
, "rho" : 961
|
|
, "sigmaf" : 962
|
|
, "sigma" : 963
|
|
, "tau" : 964
|
|
, "upsilon" : 965
|
|
, "phi" : 966
|
|
, "chi" : 967
|
|
, "psi" : 968
|
|
, "omega" : 969
|
|
, "thetasym" : 977
|
|
, "upsih" : 978
|
|
, "piv" : 982
|
|
, "ensp" : 8194
|
|
, "emsp" : 8195
|
|
, "thinsp" : 8201
|
|
, "zwnj" : 8204
|
|
, "zwj" : 8205
|
|
, "lrm" : 8206
|
|
, "rlm" : 8207
|
|
, "ndash" : 8211
|
|
, "mdash" : 8212
|
|
, "lsquo" : 8216
|
|
, "rsquo" : 8217
|
|
, "sbquo" : 8218
|
|
, "ldquo" : 8220
|
|
, "rdquo" : 8221
|
|
, "bdquo" : 8222
|
|
, "dagger" : 8224
|
|
, "Dagger" : 8225
|
|
, "bull" : 8226
|
|
, "hellip" : 8230
|
|
, "permil" : 8240
|
|
, "prime" : 8242
|
|
, "Prime" : 8243
|
|
, "lsaquo" : 8249
|
|
, "rsaquo" : 8250
|
|
, "oline" : 8254
|
|
, "frasl" : 8260
|
|
, "euro" : 8364
|
|
, "image" : 8465
|
|
, "weierp" : 8472
|
|
, "real" : 8476
|
|
, "trade" : 8482
|
|
, "alefsym" : 8501
|
|
, "larr" : 8592
|
|
, "uarr" : 8593
|
|
, "rarr" : 8594
|
|
, "darr" : 8595
|
|
, "harr" : 8596
|
|
, "crarr" : 8629
|
|
, "lArr" : 8656
|
|
, "uArr" : 8657
|
|
, "rArr" : 8658
|
|
, "dArr" : 8659
|
|
, "hArr" : 8660
|
|
, "forall" : 8704
|
|
, "part" : 8706
|
|
, "exist" : 8707
|
|
, "empty" : 8709
|
|
, "nabla" : 8711
|
|
, "isin" : 8712
|
|
, "notin" : 8713
|
|
, "ni" : 8715
|
|
, "prod" : 8719
|
|
, "sum" : 8721
|
|
, "minus" : 8722
|
|
, "lowast" : 8727
|
|
, "radic" : 8730
|
|
, "prop" : 8733
|
|
, "infin" : 8734
|
|
, "ang" : 8736
|
|
, "and" : 8743
|
|
, "or" : 8744
|
|
, "cap" : 8745
|
|
, "cup" : 8746
|
|
, "int" : 8747
|
|
, "there4" : 8756
|
|
, "sim" : 8764
|
|
, "cong" : 8773
|
|
, "asymp" : 8776
|
|
, "ne" : 8800
|
|
, "equiv" : 8801
|
|
, "le" : 8804
|
|
, "ge" : 8805
|
|
, "sub" : 8834
|
|
, "sup" : 8835
|
|
, "nsub" : 8836
|
|
, "sube" : 8838
|
|
, "supe" : 8839
|
|
, "oplus" : 8853
|
|
, "otimes" : 8855
|
|
, "perp" : 8869
|
|
, "sdot" : 8901
|
|
, "lceil" : 8968
|
|
, "rceil" : 8969
|
|
, "lfloor" : 8970
|
|
, "rfloor" : 8971
|
|
, "lang" : 9001
|
|
, "rang" : 9002
|
|
, "loz" : 9674
|
|
, "spades" : 9824
|
|
, "clubs" : 9827
|
|
, "hearts" : 9829
|
|
, "diams" : 9830
|
|
}
|
|
|
|
Object.keys(sax.ENTITIES).forEach(function (key) {
|
|
var e = sax.ENTITIES[key]
|
|
var s = typeof e === 'number' ? String.fromCharCode(e) : e
|
|
sax.ENTITIES[key] = s
|
|
})
|
|
|
|
for (var S in sax.STATE) sax.STATE[sax.STATE[S]] = S
|
|
|
|
// shorthand
|
|
S = sax.STATE
|
|
|
|
function emit (parser, event, data) {
|
|
parser[event] && parser[event](data)
|
|
}
|
|
|
|
function emitNode (parser, nodeType, data) {
|
|
if (parser.textNode) closeText(parser)
|
|
emit(parser, nodeType, data)
|
|
}
|
|
|
|
function closeText (parser) {
|
|
parser.textNode = textopts(parser.opt, parser.textNode)
|
|
if (parser.textNode) emit(parser, "ontext", parser.textNode)
|
|
parser.textNode = ""
|
|
}
|
|
|
|
function textopts (opt, text) {
|
|
if (opt.trim) text = text.trim()
|
|
if (opt.normalize) text = text.replace(/\s+/g, " ")
|
|
return text
|
|
}
|
|
|
|
function error (parser, er) {
|
|
closeText(parser)
|
|
if (parser.trackPosition) {
|
|
er += "\nLine: "+parser.line+
|
|
"\nColumn: "+parser.column+
|
|
"\nChar: "+parser.c
|
|
}
|
|
er = new Error(er)
|
|
parser.error = er
|
|
emit(parser, "onerror", er)
|
|
return parser
|
|
}
|
|
|
|
function end (parser) {
|
|
if (!parser.closedRoot) strictFail(parser, "Unclosed root tag")
|
|
if ((parser.state !== S.BEGIN) && (parser.state !== S.TEXT)) error(parser, "Unexpected end")
|
|
closeText(parser)
|
|
parser.c = ""
|
|
parser.closed = true
|
|
emit(parser, "onend")
|
|
SAXParser.call(parser, parser.strict, parser.opt)
|
|
return parser
|
|
}
|
|
|
|
function strictFail (parser, message) {
|
|
if (typeof parser !== 'object' || !(parser instanceof SAXParser))
|
|
throw new Error('bad call to strictFail');
|
|
if (parser.strict) error(parser, message)
|
|
}
|
|
|
|
function newTag (parser) {
|
|
if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
, tag = parser.tag = { name : parser.tagName, attributes : {} }
|
|
|
|
// will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
|
|
if (parser.opt.xmlns) tag.ns = parent.ns
|
|
parser.attribList.length = 0
|
|
}
|
|
|
|
function qname (name, attribute) {
|
|
var i = name.indexOf(":")
|
|
, qualName = i < 0 ? [ "", name ] : name.split(":")
|
|
, prefix = qualName[0]
|
|
, local = qualName[1]
|
|
|
|
// <x "xmlns"="http://foo">
|
|
if (attribute && name === "xmlns") {
|
|
prefix = "xmlns"
|
|
local = ""
|
|
}
|
|
|
|
return { prefix: prefix, local: local }
|
|
}
|
|
|
|
function attrib (parser) {
|
|
if (!parser.strict) parser.attribName = parser.attribName[parser.looseCase]()
|
|
|
|
if (parser.attribList.indexOf(parser.attribName) !== -1 ||
|
|
parser.tag.attributes.hasOwnProperty(parser.attribName)) {
|
|
return parser.attribName = parser.attribValue = ""
|
|
}
|
|
|
|
if (parser.opt.xmlns) {
|
|
var qn = qname(parser.attribName, true)
|
|
, prefix = qn.prefix
|
|
, local = qn.local
|
|
|
|
if (prefix === "xmlns") {
|
|
// namespace binding attribute; push the binding into scope
|
|
if (local === "xml" && parser.attribValue !== XML_NAMESPACE) {
|
|
strictFail( parser
|
|
, "xml: prefix must be bound to " + XML_NAMESPACE + "\n"
|
|
+ "Actual: " + parser.attribValue )
|
|
} else if (local === "xmlns" && parser.attribValue !== XMLNS_NAMESPACE) {
|
|
strictFail( parser
|
|
, "xmlns: prefix must be bound to " + XMLNS_NAMESPACE + "\n"
|
|
+ "Actual: " + parser.attribValue )
|
|
} else {
|
|
var tag = parser.tag
|
|
, parent = parser.tags[parser.tags.length - 1] || parser
|
|
if (tag.ns === parent.ns) {
|
|
tag.ns = Object.create(parent.ns)
|
|
}
|
|
tag.ns[local] = parser.attribValue
|
|
}
|
|
}
|
|
|
|
// defer onattribute events until all attributes have been seen
|
|
// so any new bindings can take effect; preserve attribute order
|
|
// so deferred events can be emitted in document order
|
|
parser.attribList.push([parser.attribName, parser.attribValue])
|
|
} else {
|
|
// in non-xmlns mode, we can emit the event right away
|
|
parser.tag.attributes[parser.attribName] = parser.attribValue
|
|
emitNode( parser
|
|
, "onattribute"
|
|
, { name: parser.attribName
|
|
, value: parser.attribValue } )
|
|
}
|
|
|
|
parser.attribName = parser.attribValue = ""
|
|
}
|
|
|
|
function openTag (parser, selfClosing) {
|
|
if (parser.opt.xmlns) {
|
|
// emit namespace binding events
|
|
var tag = parser.tag
|
|
|
|
// add namespace info to tag
|
|
var qn = qname(parser.tagName)
|
|
tag.prefix = qn.prefix
|
|
tag.local = qn.local
|
|
tag.uri = tag.ns[qn.prefix] || ""
|
|
|
|
if (tag.prefix && !tag.uri) {
|
|
strictFail(parser, "Unbound namespace prefix: "
|
|
+ JSON.stringify(parser.tagName))
|
|
tag.uri = qn.prefix
|
|
}
|
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
if (tag.ns && parent.ns !== tag.ns) {
|
|
Object.keys(tag.ns).forEach(function (p) {
|
|
emitNode( parser
|
|
, "onopennamespace"
|
|
, { prefix: p , uri: tag.ns[p] } )
|
|
})
|
|
}
|
|
|
|
// handle deferred onattribute events
|
|
// Note: do not apply default ns to attributes:
|
|
// http://www.w3.org/TR/REC-xml-names/#defaulting
|
|
for (var i = 0, l = parser.attribList.length; i < l; i ++) {
|
|
var nv = parser.attribList[i]
|
|
var name = nv[0]
|
|
, value = nv[1]
|
|
, qualName = qname(name, true)
|
|
, prefix = qualName.prefix
|
|
, local = qualName.local
|
|
, uri = prefix == "" ? "" : (tag.ns[prefix] || "")
|
|
, a = { name: name
|
|
, value: value
|
|
, prefix: prefix
|
|
, local: local
|
|
, uri: uri
|
|
}
|
|
|
|
// if there's any attributes with an undefined namespace,
|
|
// then fail on them now.
|
|
if (prefix && prefix != "xmlns" && !uri) {
|
|
strictFail(parser, "Unbound namespace prefix: "
|
|
+ JSON.stringify(prefix))
|
|
a.uri = prefix
|
|
}
|
|
parser.tag.attributes[name] = a
|
|
emitNode(parser, "onattribute", a)
|
|
}
|
|
parser.attribList.length = 0
|
|
}
|
|
|
|
parser.tag.isSelfClosing = !!selfClosing
|
|
|
|
// process the tag
|
|
parser.sawRoot = true
|
|
parser.tags.push(parser.tag)
|
|
emitNode(parser, "onopentag", parser.tag)
|
|
if (!selfClosing) {
|
|
// special case for <script> in non-strict mode.
|
|
if (!parser.noscript && parser.tagName.toLowerCase() === "script") {
|
|
parser.state = S.SCRIPT
|
|
} else {
|
|
parser.state = S.TEXT
|
|
}
|
|
parser.tag = null
|
|
parser.tagName = ""
|
|
}
|
|
parser.attribName = parser.attribValue = ""
|
|
parser.attribList.length = 0
|
|
}
|
|
|
|
function closeTag (parser) {
|
|
if (!parser.tagName) {
|
|
strictFail(parser, "Weird empty close tag.")
|
|
parser.textNode += "</>"
|
|
parser.state = S.TEXT
|
|
return
|
|
}
|
|
|
|
if (parser.script) {
|
|
if (parser.tagName !== "script") {
|
|
parser.script += "</" + parser.tagName + ">"
|
|
parser.tagName = ""
|
|
parser.state = S.SCRIPT
|
|
return
|
|
}
|
|
emitNode(parser, "onscript", parser.script)
|
|
parser.script = ""
|
|
}
|
|
|
|
// first make sure that the closing tag actually exists.
|
|
// <a><b></c></b></a> will close everything, otherwise.
|
|
var t = parser.tags.length
|
|
var tagName = parser.tagName
|
|
if (!parser.strict) tagName = tagName[parser.looseCase]()
|
|
var closeTo = tagName
|
|
while (t --) {
|
|
var close = parser.tags[t]
|
|
if (close.name !== closeTo) {
|
|
// fail the first time in strict mode
|
|
strictFail(parser, "Unexpected close tag")
|
|
} else break
|
|
}
|
|
|
|
// didn't find it. we already failed for strict, so just abort.
|
|
if (t < 0) {
|
|
strictFail(parser, "Unmatched closing tag: "+parser.tagName)
|
|
parser.textNode += "</" + parser.tagName + ">"
|
|
parser.state = S.TEXT
|
|
return
|
|
}
|
|
parser.tagName = tagName
|
|
var s = parser.tags.length
|
|
while (s --> t) {
|
|
var tag = parser.tag = parser.tags.pop()
|
|
parser.tagName = parser.tag.name
|
|
emitNode(parser, "onclosetag", parser.tagName)
|
|
|
|
var x = {}
|
|
for (var i in tag.ns) x[i] = tag.ns[i]
|
|
|
|
var parent = parser.tags[parser.tags.length - 1] || parser
|
|
if (parser.opt.xmlns && tag.ns !== parent.ns) {
|
|
// remove namespace bindings introduced by tag
|
|
Object.keys(tag.ns).forEach(function (p) {
|
|
var n = tag.ns[p]
|
|
emitNode(parser, "onclosenamespace", { prefix: p, uri: n })
|
|
})
|
|
}
|
|
}
|
|
if (t === 0) parser.closedRoot = true
|
|
parser.tagName = parser.attribValue = parser.attribName = ""
|
|
parser.attribList.length = 0
|
|
parser.state = S.TEXT
|
|
}
|
|
|
|
function parseEntity (parser) {
|
|
var entity = parser.entity
|
|
, entityLC = entity.toLowerCase()
|
|
, num
|
|
, numStr = ""
|
|
if (parser.ENTITIES[entity])
|
|
return parser.ENTITIES[entity]
|
|
if (parser.ENTITIES[entityLC])
|
|
return parser.ENTITIES[entityLC]
|
|
entity = entityLC
|
|
if (entity.charAt(0) === "#") {
|
|
if (entity.charAt(1) === "x") {
|
|
entity = entity.slice(2)
|
|
num = parseInt(entity, 16)
|
|
numStr = num.toString(16)
|
|
} else {
|
|
entity = entity.slice(1)
|
|
num = parseInt(entity, 10)
|
|
numStr = num.toString(10)
|
|
}
|
|
}
|
|
entity = entity.replace(/^0+/, "")
|
|
if (numStr.toLowerCase() !== entity) {
|
|
strictFail(parser, "Invalid character entity")
|
|
return "&"+parser.entity + ";"
|
|
}
|
|
|
|
return String.fromCodePoint(num)
|
|
}
|
|
|
|
function write (chunk) {
|
|
var parser = this
|
|
if (this.error) throw this.error
|
|
if (parser.closed) return error(parser,
|
|
"Cannot write after close. Assign an onready handler.")
|
|
if (chunk === null) return end(parser)
|
|
var i = 0, c = ""
|
|
while (parser.c = c = chunk.charAt(i++)) {
|
|
if (parser.trackPosition) {
|
|
parser.position ++
|
|
if (c === "\n") {
|
|
parser.line ++
|
|
parser.column = 0
|
|
} else parser.column ++
|
|
}
|
|
switch (parser.state) {
|
|
|
|
case S.BEGIN:
|
|
if (c === "<") {
|
|
parser.state = S.OPEN_WAKA
|
|
parser.startTagPosition = parser.position
|
|
} else if (not(whitespace,c)) {
|
|
// have to process this as a text node.
|
|
// weird, but happens.
|
|
strictFail(parser, "Non-whitespace before first tag.")
|
|
parser.textNode = c
|
|
parser.state = S.TEXT
|
|
}
|
|
continue
|
|
|
|
case S.TEXT:
|
|
if (parser.sawRoot && !parser.closedRoot) {
|
|
var starti = i-1
|
|
while (c && c!=="<" && c!=="&") {
|
|
c = chunk.charAt(i++)
|
|
if (c && parser.trackPosition) {
|
|
parser.position ++
|
|
if (c === "\n") {
|
|
parser.line ++
|
|
parser.column = 0
|
|
} else parser.column ++
|
|
}
|
|
}
|
|
parser.textNode += chunk.substring(starti, i-1)
|
|
}
|
|
if (c === "<") {
|
|
parser.state = S.OPEN_WAKA
|
|
parser.startTagPosition = parser.position
|
|
} else {
|
|
if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot))
|
|
strictFail(parser, "Text data outside of root node.")
|
|
if (c === "&") parser.state = S.TEXT_ENTITY
|
|
else parser.textNode += c
|
|
}
|
|
continue
|
|
|
|
case S.SCRIPT:
|
|
// only non-strict
|
|
if (c === "<") {
|
|
parser.state = S.SCRIPT_ENDING
|
|
} else parser.script += c
|
|
continue
|
|
|
|
case S.SCRIPT_ENDING:
|
|
if (c === "/") {
|
|
parser.state = S.CLOSE_TAG
|
|
} else {
|
|
parser.script += "<" + c
|
|
parser.state = S.SCRIPT
|
|
}
|
|
continue
|
|
|
|
case S.OPEN_WAKA:
|
|
// either a /, ?, !, or text is coming next.
|
|
if (c === "!") {
|
|
parser.state = S.SGML_DECL
|
|
parser.sgmlDecl = ""
|
|
} else if (is(whitespace, c)) {
|
|
// wait for it...
|
|
} else if (is(nameStart,c)) {
|
|
parser.state = S.OPEN_TAG
|
|
parser.tagName = c
|
|
} else if (c === "/") {
|
|
parser.state = S.CLOSE_TAG
|
|
parser.tagName = ""
|
|
} else if (c === "?") {
|
|
parser.state = S.PROC_INST
|
|
parser.procInstName = parser.procInstBody = ""
|
|
} else {
|
|
strictFail(parser, "Unencoded <")
|
|
// if there was some whitespace, then add that in.
|
|
if (parser.startTagPosition + 1 < parser.position) {
|
|
var pad = parser.position - parser.startTagPosition
|
|
c = new Array(pad).join(" ") + c
|
|
}
|
|
parser.textNode += "<" + c
|
|
parser.state = S.TEXT
|
|
}
|
|
continue
|
|
|
|
case S.SGML_DECL:
|
|
if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
|
|
emitNode(parser, "onopencdata")
|
|
parser.state = S.CDATA
|
|
parser.sgmlDecl = ""
|
|
parser.cdata = ""
|
|
} else if (parser.sgmlDecl+c === "--") {
|
|
parser.state = S.COMMENT
|
|
parser.comment = ""
|
|
parser.sgmlDecl = ""
|
|
} else if ((parser.sgmlDecl+c).toUpperCase() === DOCTYPE) {
|
|
parser.state = S.DOCTYPE
|
|
if (parser.doctype || parser.sawRoot) strictFail(parser,
|
|
"Inappropriately located doctype declaration")
|
|
parser.doctype = ""
|
|
parser.sgmlDecl = ""
|
|
} else if (c === ">") {
|
|
emitNode(parser, "onsgmldeclaration", parser.sgmlDecl)
|
|
parser.sgmlDecl = ""
|
|
parser.state = S.TEXT
|
|
} else if (is(quote, c)) {
|
|
parser.state = S.SGML_DECL_QUOTED
|
|
parser.sgmlDecl += c
|
|
} else parser.sgmlDecl += c
|
|
continue
|
|
|
|
case S.SGML_DECL_QUOTED:
|
|
if (c === parser.q) {
|
|
parser.state = S.SGML_DECL
|
|
parser.q = ""
|
|
}
|
|
parser.sgmlDecl += c
|
|
continue
|
|
|
|
case S.DOCTYPE:
|
|
if (c === ">") {
|
|
parser.state = S.TEXT
|
|
emitNode(parser, "ondoctype", parser.doctype)
|
|
parser.doctype = true // just remember that we saw it.
|
|
} else {
|
|
parser.doctype += c
|
|
if (c === "[") parser.state = S.DOCTYPE_DTD
|
|
else if (is(quote, c)) {
|
|
parser.state = S.DOCTYPE_QUOTED
|
|
parser.q = c
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.DOCTYPE_QUOTED:
|
|
parser.doctype += c
|
|
if (c === parser.q) {
|
|
parser.q = ""
|
|
parser.state = S.DOCTYPE
|
|
}
|
|
continue
|
|
|
|
case S.DOCTYPE_DTD:
|
|
parser.doctype += c
|
|
if (c === "]") parser.state = S.DOCTYPE
|
|
else if (is(quote,c)) {
|
|
parser.state = S.DOCTYPE_DTD_QUOTED
|
|
parser.q = c
|
|
}
|
|
continue
|
|
|
|
case S.DOCTYPE_DTD_QUOTED:
|
|
parser.doctype += c
|
|
if (c === parser.q) {
|
|
parser.state = S.DOCTYPE_DTD
|
|
parser.q = ""
|
|
}
|
|
continue
|
|
|
|
case S.COMMENT:
|
|
if (c === "-") parser.state = S.COMMENT_ENDING
|
|
else parser.comment += c
|
|
continue
|
|
|
|
case S.COMMENT_ENDING:
|
|
if (c === "-") {
|
|
parser.state = S.COMMENT_ENDED
|
|
parser.comment = textopts(parser.opt, parser.comment)
|
|
if (parser.comment) emitNode(parser, "oncomment", parser.comment)
|
|
parser.comment = ""
|
|
} else {
|
|
parser.comment += "-" + c
|
|
parser.state = S.COMMENT
|
|
}
|
|
continue
|
|
|
|
case S.COMMENT_ENDED:
|
|
if (c !== ">") {
|
|
strictFail(parser, "Malformed comment")
|
|
// allow <!-- blah -- bloo --> in non-strict mode,
|
|
// which is a comment of " blah -- bloo "
|
|
parser.comment += "--" + c
|
|
parser.state = S.COMMENT
|
|
} else parser.state = S.TEXT
|
|
continue
|
|
|
|
case S.CDATA:
|
|
if (c === "]") parser.state = S.CDATA_ENDING
|
|
else parser.cdata += c
|
|
continue
|
|
|
|
case S.CDATA_ENDING:
|
|
if (c === "]") parser.state = S.CDATA_ENDING_2
|
|
else {
|
|
parser.cdata += "]" + c
|
|
parser.state = S.CDATA
|
|
}
|
|
continue
|
|
|
|
case S.CDATA_ENDING_2:
|
|
if (c === ">") {
|
|
if (parser.cdata) emitNode(parser, "oncdata", parser.cdata)
|
|
emitNode(parser, "onclosecdata")
|
|
parser.cdata = ""
|
|
parser.state = S.TEXT
|
|
} else if (c === "]") {
|
|
parser.cdata += "]"
|
|
} else {
|
|
parser.cdata += "]]" + c
|
|
parser.state = S.CDATA
|
|
}
|
|
continue
|
|
|
|
case S.PROC_INST:
|
|
if (c === "?") parser.state = S.PROC_INST_ENDING
|
|
else if (is(whitespace, c)) parser.state = S.PROC_INST_BODY
|
|
else parser.procInstName += c
|
|
continue
|
|
|
|
case S.PROC_INST_BODY:
|
|
if (!parser.procInstBody && is(whitespace, c)) continue
|
|
else if (c === "?") parser.state = S.PROC_INST_ENDING
|
|
else parser.procInstBody += c
|
|
continue
|
|
|
|
case S.PROC_INST_ENDING:
|
|
if (c === ">") {
|
|
emitNode(parser, "onprocessinginstruction", {
|
|
name : parser.procInstName,
|
|
body : parser.procInstBody
|
|
})
|
|
parser.procInstName = parser.procInstBody = ""
|
|
parser.state = S.TEXT
|
|
} else {
|
|
parser.procInstBody += "?" + c
|
|
parser.state = S.PROC_INST_BODY
|
|
}
|
|
continue
|
|
|
|
case S.OPEN_TAG:
|
|
if (is(nameBody, c)) parser.tagName += c
|
|
else {
|
|
newTag(parser)
|
|
if (c === ">") openTag(parser)
|
|
else if (c === "/") parser.state = S.OPEN_TAG_SLASH
|
|
else {
|
|
if (not(whitespace, c)) strictFail(
|
|
parser, "Invalid character in tag name")
|
|
parser.state = S.ATTRIB
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.OPEN_TAG_SLASH:
|
|
if (c === ">") {
|
|
openTag(parser, true)
|
|
closeTag(parser)
|
|
} else {
|
|
strictFail(parser, "Forward-slash in opening tag not followed by >")
|
|
parser.state = S.ATTRIB
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB:
|
|
// haven't read the attribute name yet.
|
|
if (is(whitespace, c)) continue
|
|
else if (c === ">") openTag(parser)
|
|
else if (c === "/") parser.state = S.OPEN_TAG_SLASH
|
|
else if (is(nameStart, c)) {
|
|
parser.attribName = c
|
|
parser.attribValue = ""
|
|
parser.state = S.ATTRIB_NAME
|
|
} else strictFail(parser, "Invalid attribute name")
|
|
continue
|
|
|
|
case S.ATTRIB_NAME:
|
|
if (c === "=") parser.state = S.ATTRIB_VALUE
|
|
else if (c === ">") {
|
|
strictFail(parser, "Attribute without value")
|
|
parser.attribValue = parser.attribName
|
|
attrib(parser)
|
|
openTag(parser)
|
|
}
|
|
else if (is(whitespace, c)) parser.state = S.ATTRIB_NAME_SAW_WHITE
|
|
else if (is(nameBody, c)) parser.attribName += c
|
|
else strictFail(parser, "Invalid attribute name")
|
|
continue
|
|
|
|
case S.ATTRIB_NAME_SAW_WHITE:
|
|
if (c === "=") parser.state = S.ATTRIB_VALUE
|
|
else if (is(whitespace, c)) continue
|
|
else {
|
|
strictFail(parser, "Attribute without value")
|
|
parser.tag.attributes[parser.attribName] = ""
|
|
parser.attribValue = ""
|
|
emitNode(parser, "onattribute",
|
|
{ name : parser.attribName, value : "" })
|
|
parser.attribName = ""
|
|
if (c === ">") openTag(parser)
|
|
else if (is(nameStart, c)) {
|
|
parser.attribName = c
|
|
parser.state = S.ATTRIB_NAME
|
|
} else {
|
|
strictFail(parser, "Invalid attribute name")
|
|
parser.state = S.ATTRIB
|
|
}
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE:
|
|
if (is(whitespace, c)) continue
|
|
else if (is(quote, c)) {
|
|
parser.q = c
|
|
parser.state = S.ATTRIB_VALUE_QUOTED
|
|
} else {
|
|
strictFail(parser, "Unquoted attribute value")
|
|
parser.state = S.ATTRIB_VALUE_UNQUOTED
|
|
parser.attribValue = c
|
|
}
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE_QUOTED:
|
|
if (c !== parser.q) {
|
|
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_Q
|
|
else parser.attribValue += c
|
|
continue
|
|
}
|
|
attrib(parser)
|
|
parser.q = ""
|
|
parser.state = S.ATTRIB_VALUE_CLOSED
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE_CLOSED:
|
|
if (is(whitespace, c)) {
|
|
parser.state = S.ATTRIB
|
|
} else if (c === ">") openTag(parser)
|
|
else if (c === "/") parser.state = S.OPEN_TAG_SLASH
|
|
else if (is(nameStart, c)) {
|
|
strictFail(parser, "No whitespace between attributes")
|
|
parser.attribName = c
|
|
parser.attribValue = ""
|
|
parser.state = S.ATTRIB_NAME
|
|
} else strictFail(parser, "Invalid attribute name")
|
|
continue
|
|
|
|
case S.ATTRIB_VALUE_UNQUOTED:
|
|
if (not(attribEnd,c)) {
|
|
if (c === "&") parser.state = S.ATTRIB_VALUE_ENTITY_U
|
|
else parser.attribValue += c
|
|
continue
|
|
}
|
|
attrib(parser)
|
|
if (c === ">") openTag(parser)
|
|
else parser.state = S.ATTRIB
|
|
continue
|
|
|
|
case S.CLOSE_TAG:
|
|
if (!parser.tagName) {
|
|
if (is(whitespace, c)) continue
|
|
else if (not(nameStart, c)) {
|
|
if (parser.script) {
|
|
parser.script += "</" + c
|
|
parser.state = S.SCRIPT
|
|
} else {
|
|
strictFail(parser, "Invalid tagname in closing tag.")
|
|
}
|
|
} else parser.tagName = c
|
|
}
|
|
else if (c === ">") closeTag(parser)
|
|
else if (is(nameBody, c)) parser.tagName += c
|
|
else if (parser.script) {
|
|
parser.script += "</" + parser.tagName
|
|
parser.tagName = ""
|
|
parser.state = S.SCRIPT
|
|
} else {
|
|
if (not(whitespace, c)) strictFail(parser,
|
|
"Invalid tagname in closing tag")
|
|
parser.state = S.CLOSE_TAG_SAW_WHITE
|
|
}
|
|
continue
|
|
|
|
case S.CLOSE_TAG_SAW_WHITE:
|
|
if (is(whitespace, c)) continue
|
|
if (c === ">") closeTag(parser)
|
|
else strictFail(parser, "Invalid characters in closing tag")
|
|
continue
|
|
|
|
case S.TEXT_ENTITY:
|
|
case S.ATTRIB_VALUE_ENTITY_Q:
|
|
case S.ATTRIB_VALUE_ENTITY_U:
|
|
switch(parser.state) {
|
|
case S.TEXT_ENTITY:
|
|
var returnState = S.TEXT, buffer = "textNode"
|
|
break
|
|
|
|
case S.ATTRIB_VALUE_ENTITY_Q:
|
|
var returnState = S.ATTRIB_VALUE_QUOTED, buffer = "attribValue"
|
|
break
|
|
|
|
case S.ATTRIB_VALUE_ENTITY_U:
|
|
var returnState = S.ATTRIB_VALUE_UNQUOTED, buffer = "attribValue"
|
|
break
|
|
}
|
|
if (c === ";") {
|
|
parser[buffer] += parseEntity(parser)
|
|
parser.entity = ""
|
|
parser.state = returnState
|
|
}
|
|
else if (is(entity, c)) parser.entity += c
|
|
else {
|
|
strictFail(parser, "Invalid character entity")
|
|
parser[buffer] += "&" + parser.entity + c
|
|
parser.entity = ""
|
|
parser.state = returnState
|
|
}
|
|
continue
|
|
|
|
default:
|
|
throw new Error(parser, "Unknown state: " + parser.state)
|
|
}
|
|
} // while
|
|
// cdata blocks can get very big under normal conditions. emit and move on.
|
|
// if (parser.state === S.CDATA && parser.cdata) {
|
|
// emitNode(parser, "oncdata", parser.cdata)
|
|
// parser.cdata = ""
|
|
// }
|
|
if (parser.position >= parser.bufferCheckPosition) checkBufferLength(parser)
|
|
return parser
|
|
}
|
|
|
|
/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
|
|
if (!String.fromCodePoint) {
|
|
(function() {
|
|
var stringFromCharCode = String.fromCharCode;
|
|
var floor = Math.floor;
|
|
var fromCodePoint = function() {
|
|
var MAX_SIZE = 0x4000;
|
|
var codeUnits = [];
|
|
var highSurrogate;
|
|
var lowSurrogate;
|
|
var index = -1;
|
|
var length = arguments.length;
|
|
if (!length) {
|
|
return '';
|
|
}
|
|
var result = '';
|
|
while (++index < length) {
|
|
var codePoint = Number(arguments[index]);
|
|
if (
|
|
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
|
codePoint < 0 || // not a valid Unicode code point
|
|
codePoint > 0x10FFFF || // not a valid Unicode code point
|
|
floor(codePoint) != codePoint // not an integer
|
|
) {
|
|
throw RangeError('Invalid code point: ' + codePoint);
|
|
}
|
|
if (codePoint <= 0xFFFF) { // BMP code point
|
|
codeUnits.push(codePoint);
|
|
} else { // Astral code point; split in surrogate halves
|
|
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
|
codePoint -= 0x10000;
|
|
highSurrogate = (codePoint >> 10) + 0xD800;
|
|
lowSurrogate = (codePoint % 0x400) + 0xDC00;
|
|
codeUnits.push(highSurrogate, lowSurrogate);
|
|
}
|
|
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
|
|
result += stringFromCharCode.apply(null, codeUnits);
|
|
codeUnits.length = 0;
|
|
}
|
|
}
|
|
return result;
|
|
};
|
|
if (Object.defineProperty) {
|
|
Object.defineProperty(String, 'fromCodePoint', {
|
|
'value': fromCodePoint,
|
|
'configurable': true,
|
|
'writable': true
|
|
});
|
|
} else {
|
|
String.fromCodePoint = fromCodePoint;
|
|
}
|
|
}());
|
|
}
|
|
|
|
})(typeof exports === "undefined" ? sax = {} : exports);
|