;(function (sax) { // wrapper for non-node envs sax.parser = function (strict, opt) { return new SAXParser(strict, opt) } sax.SAXParser = SAXParser sax.SAXStream = SAXStream sax.createStream = createStream // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), // since that's the earliest that a buffer overrun could occur. This way, checks are // as rare as required, but as often as necessary to ensure never crossing this bound. // Furthermore, buffers are only tested at most once per write(), so passing a very // large string into write() might have undesirable effects, but this is manageable by // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme // edge case, result in creating at most one complete copy of the string passed in. // Set to Infinity to have unlimited buffers. sax.MAX_BUFFER_LENGTH = 64 * 1024 var buffers = [ 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype', 'procInstName', 'procInstBody', 'entity', 'attribName', 'attribValue', 'cdata', 'script' ] sax.EVENTS = [ 'text', 'processinginstruction', 'sgmldeclaration', 'doctype', 'comment', 'opentagstart', 'attribute', 'opentag', 'closetag', 'opencdata', 'cdata', 'closecdata', 'error', 'end', 'ready', 'script', 'opennamespace', 'closenamespace' ] function SAXParser (strict, opt) { if (!(this instanceof SAXParser)) { return new SAXParser(strict, opt) } var parser = this clearBuffers(parser) parser.q = parser.c = '' parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH parser.opt = opt || {} parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase' parser.tags = [] parser.closed = parser.closedRoot = parser.sawRoot = false parser.tag = parser.error = null parser.strict = !!strict parser.noscript = !!(strict || parser.opt.noscript) parser.state = S.BEGIN parser.strictEntities = parser.opt.strictEntities parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES) parser.attribList = [] // namespaces form a prototype chain. // it always points at the current tag, // which protos to its parent tag. if (parser.opt.xmlns) { parser.ns = Object.create(rootNS) } // mostly just for error reporting parser.trackPosition = parser.opt.position !== false if (parser.trackPosition) { parser.position = parser.line = parser.column = 0 } emit(parser, 'onready') } if (!Object.create) { Object.create = function (o) { function F () {} F.prototype = o var newf = new F() return newf } } if (!Object.keys) { Object.keys = function (o) { var a = [] for (var i in o) if (o.hasOwnProperty(i)) a.push(i) return a } } function checkBufferLength (parser) { var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10) var maxActual = 0 for (var i = 0, l = buffers.length; i < l; i++) { var len = parser[buffers[i]].length if (len > maxAllowed) { // Text/cdata nodes can get big, and since they're buffered, // we can get here under normal conditions. // Avoid issues by emitting the text node now, // so at least it won't get any bigger. switch (buffers[i]) { case 'textNode': closeText(parser) break case 'cdata': emitNode(parser, 'oncdata', parser.cdata) parser.cdata = '' break case 'script': emitNode(parser, 'onscript', parser.script) parser.script = '' break default: error(parser, 'Max buffer length exceeded: ' + buffers[i]) } } maxActual = Math.max(maxActual, len) } // schedule the next check for the earliest possible buffer overrun. var m = sax.MAX_BUFFER_LENGTH - maxActual parser.bufferCheckPosition = m + parser.position } function clearBuffers (parser) { for (var i = 0, l = buffers.length; i < l; i++) { parser[buffers[i]] = '' } } function flushBuffers (parser) { closeText(parser) if (parser.cdata !== '') { emitNode(parser, 'oncdata', parser.cdata) parser.cdata = '' } if (parser.script !== '') { emitNode(parser, 'onscript', parser.script) parser.script = '' } } SAXParser.prototype = { end: function () { end(this) }, write: write, resume: function () { this.error = null; return this }, close: function () { return this.write(null) }, flush: function () { flushBuffers(this) } } var Stream try { Stream = require('stream').Stream } catch (ex) { Stream = function () {} } var streamWraps = sax.EVENTS.filter(function (ev) { return ev !== 'error' && ev !== 'end' }) function createStream (strict, opt) { return new SAXStream(strict, opt) } function SAXStream (strict, opt) { if (!(this instanceof SAXStream)) { return new SAXStream(strict, opt) } Stream.apply(this) this._parser = new SAXParser(strict, opt) this.writable = true this.readable = true var me = this this._parser.onend = function () { me.emit('end') } this._parser.onerror = function (er) { me.emit('error', er) // if didn't throw, then means error was handled. // go ahead and clear error, so we can write again. me._parser.error = null } this._decoder = null streamWraps.forEach(function (ev) { Object.defineProperty(me, 'on' + ev, { get: function () { return me._parser['on' + ev] }, set: function (h) { if (!h) { me.removeAllListeners(ev) me._parser['on' + ev] = h return h } me.on(ev, h) }, enumerable: true, configurable: false }) }) } SAXStream.prototype = Object.create(Stream.prototype, { constructor: { value: SAXStream } }) SAXStream.prototype.write = function (data) { if (typeof Buffer === 'function' && typeof Buffer.isBuffer === 'function' && Buffer.isBuffer(data)) { if (!this._decoder) { var SD = require('string_decoder').StringDecoder this._decoder = new SD('utf8') } data = this._decoder.write(data) } this._parser.write(data.toString()) this.emit('data', data) return true } SAXStream.prototype.end = function (chunk) { if (chunk && chunk.length) { this.write(chunk) } this._parser.end() return true } SAXStream.prototype.on = function (ev, handler) { var me = this if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) { me._parser['on' + ev] = function () { var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments) args.splice(0, 0, ev) me.emit.apply(me, args) } } return Stream.prototype.on.call(me, ev, handler) } // character classes and tokens var whitespace = '\r\n\t ' // this really needs to be replaced with character classes. // XML allows all manner of ridiculous numbers and digits. // (Letter | "_" | ":") var quote = '\'"' var attribEnd = whitespace + '>' var CDATA = '[CDATA[' var DOCTYPE = 'DOCTYPE' var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/' var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE } // turn all the string character sets into character class objects. whitespace = charClass(whitespace) // http://www.w3.org/TR/REC-xml/#NT-NameStartChar // This implementation works on strings, a single character at a time // as such, it cannot ever support astral-plane characters (10000-EFFFF) // without a significant breaking change to either this parser, or the // JavaScript language. Implementation of an emoji-capable xml parser // is left as an exercise for the reader. var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/ var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040.\d-]/ quote = charClass(quote) attribEnd = charClass(attribEnd) function charClass (str) { return str.split('').reduce(function (s, c) { s[c] = true return s }, {}) } function isMatch (regex, c) { return regex.test(c) } function is (charclass, c) { return charclass[c] } function notMatch (regex, c) { return !isMatch(regex, c) } function not (charclass, c) { return !is(charclass, c) } var S = 0 sax.STATE = { BEGIN: S++, // leading byte order mark or whitespace BEGIN_WHITESPACE: S++, // leading whitespace TEXT: S++, // general stuff TEXT_ENTITY: S++, // & and such. OPEN_WAKA: S++, // < SGML_DECL: S++, // SCRIPT: S++, //