diff options
Diffstat (limited to 'node_modules/htmlparser2/lib/Parser.js')
-rw-r--r-- | node_modules/htmlparser2/lib/Parser.js | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/node_modules/htmlparser2/lib/Parser.js b/node_modules/htmlparser2/lib/Parser.js new file mode 100644 index 000000000..e94e346f8 --- /dev/null +++ b/node_modules/htmlparser2/lib/Parser.js @@ -0,0 +1,314 @@ +var Tokenizer = require("./Tokenizer.js"); + +/* + Options: + + xmlMode: Special behavior for script/style tags (true by default) + lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`) + lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`) +*/ + +/* + Callbacks: + + oncdataend, + oncdatastart, + onclosetag, + oncomment, + oncommentend, + onerror, + onopentag, + onprocessinginstruction, + onreset, + ontext +*/ + +var formTags = { + input: true, + option: true, + optgroup: true, + select: true, + button: true, + datalist: true, + textarea: true +}; + +var openImpliesClose = { + tr : { tr:true, th:true, td:true }, + th : { th:true }, + td : { thead:true, td:true }, + body : { head:true, link:true, script:true }, + li : { li:true }, + p : { p:true }, + select : formTags, + input : formTags, + output : formTags, + button : formTags, + datalist: formTags, + textarea: formTags, + option : { option:true }, + optgroup: { optgroup:true } +}; + +var voidElements = { + __proto__: null, + area: true, + base: true, + basefont: true, + br: true, + col: true, + command: true, + embed: true, + frame: true, + hr: true, + img: true, + input: true, + isindex: true, + keygen: true, + link: true, + meta: true, + param: true, + source: true, + track: true, + wbr: true +}; + +var re_nameEnd = /\s|\//; + +function Parser(cbs, options){ + this._options = options || {}; + this._cbs = cbs || {}; + + this._tagname = ""; + this._attribname = ""; + this._attribvalue = ""; + this._attribs = null; + this._stack = []; + this._done = false; + + this.startIndex = 0; + this.endIndex = null; + + this._tokenizer = new Tokenizer(options, this); +} + +require("util").inherits(Parser, require("events").EventEmitter); + +Parser.prototype._updatePosition = function(initialOffset){ + if(this.endIndex === null){ + this.startIndex = this._tokenizer._sectionStart <= initialOffset ? 0 : this._tokenizer._sectionStart - initialOffset; + } + this.startIndex = this.endIndex + 1; + this.endIndex = this._tokenizer._index; +}; + +//Tokenizer event handlers +Parser.prototype.ontext = function(data){ + this._updatePosition(1); + this.endIndex--; + + if(this._cbs.ontext) this._cbs.ontext(data); +}; + +Parser.prototype.onopentagname = function(name){ + if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){ + name = name.toLowerCase(); + } + + this._tagname = name; + + if (!this._options.xmlMode && name in openImpliesClose) { + for( + var el; + (el = this._stack[this._stack.length-1]) in openImpliesClose[name]; + this.onclosetag(el) + ); + } + + if(this._options.xmlMode || !(name in voidElements)){ + this._stack.push(name); + } + + if(this._cbs.onopentagname) this._cbs.onopentagname(name); + if(this._cbs.onopentag) this._attribs = {}; +}; + +Parser.prototype.onopentagend = function(){ + this._updatePosition(1); + + if(this._attribs){ + if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs); + this._attribs = null; + } + + if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){ + this._cbs.onclosetag(this._tagname); + } + + this._tagname = ""; +}; + +Parser.prototype.onclosetag = function(name){ + this._updatePosition(1); + + if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){ + name = name.toLowerCase(); + } + + if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){ + var pos = this._stack.lastIndexOf(name); + if(pos !== -1){ + if(this._cbs.onclosetag){ + pos = this._stack.length - pos; + while(pos--) this._cbs.onclosetag(this._stack.pop()); + } + else this._stack.length = pos; + } else if(name === "p" && !this._options.xmlMode){ + this.onopentagname(name); + this._closeCurrentTag(); + } + } else if(!this._options.xmlMode && (name === "br" || name === "p")){ + this.onopentagname(name); + this._closeCurrentTag(); + } +}; + +Parser.prototype.onselfclosingtag = function(){ + if(this._options.xmlMode){ + this._closeCurrentTag(); + } else { + this.onopentagend(); + } +}; + +Parser.prototype._closeCurrentTag = function(){ + var name = this._tagname; + + this.onopentagend(); + + //self-closing tags will be on the top of the stack + //(cheaper check than in onclosetag) + if(this._stack[this._stack.length-1] === name){ + if(this._cbs.onclosetag){ + this._cbs.onclosetag(name); + } + this._stack.pop(); + } +}; + +Parser.prototype.onattribname = function(name){ + if(!(this._options.xmlMode || "lowerCaseAttributeNames" in this._options) || this._options.lowerCaseAttributeNames){ + name = name.toLowerCase(); + } + this._attribname = name; +}; + +Parser.prototype.onattribdata = function(value){ + this._attribvalue += value; +}; + +Parser.prototype.onattribend = function(){ + if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue); + if( + this._attribs && + !Object.prototype.hasOwnProperty.call(this._attribs, this._attribname) + ){ + this._attribs[this._attribname] = this._attribvalue; + } + this._attribname = ""; + this._attribvalue = ""; +}; + +Parser.prototype.ondeclaration = function(value){ + if(this._cbs.onprocessinginstruction){ + var idx = value.search(re_nameEnd), + name = idx < 0 ? value : value.substr(0, idx); + + if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){ + name = name.toLowerCase(); + } + this._cbs.onprocessinginstruction("!" + name, "!" + value); + } +}; + +Parser.prototype.onprocessinginstruction = function(value){ + if(this._cbs.onprocessinginstruction){ + var idx = value.search(re_nameEnd), + name = idx < 0 ? value : value.substr(0, idx); + + if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){ + name = name.toLowerCase(); + } + this._cbs.onprocessinginstruction("?" + name, "?" + value); + } +}; + +Parser.prototype.oncomment = function(value){ + this._updatePosition(4); + + if(this._cbs.oncomment) this._cbs.oncomment(value); + if(this._cbs.oncommentend) this._cbs.oncommentend(); +}; + +Parser.prototype.oncdata = function(value){ + this._updatePosition(1); + + if(this._options.xmlMode){ + if(this._cbs.oncdatastart) this._cbs.oncdatastart(); + if(this._cbs.ontext) this._cbs.ontext(value); + if(this._cbs.oncdataend) this._cbs.oncdataend(); + } else { + this.oncomment("[CDATA[" + value + "]]"); + } +}; + +Parser.prototype.onerror = function(err){ + if(this._cbs.onerror) this._cbs.onerror(err); +}; + +Parser.prototype.onend = function(){ + if(this._cbs.onclosetag){ + for( + var i = this._stack.length; + i > 0; + this._cbs.onclosetag(this._stack[--i]) + ); + } + if(this._cbs.onend) this._cbs.onend(); +}; + + +//Resets the parser to a blank state, ready to parse a new HTML document +Parser.prototype.reset = function(){ + if(this._cbs.onreset) this._cbs.onreset(); + this._tokenizer.reset(); + + this._tagname = ""; + this._attribname = ""; + this._attribs = null; + this._stack = []; + this._done = false; +}; + +//Parses a complete HTML document and pushes it to the handler +Parser.prototype.parseComplete = function(data){ + this.reset(); + this.end(data); +}; + +Parser.prototype.write = function(chunk){ + if(this._done) this.onerror(Error(".write() after done!")); + this._tokenizer.write(chunk); +}; + +Parser.prototype.end = function(chunk){ + if(this._done) this.onerror(Error(".end() after done!")); + this._tokenizer.end(chunk); + this._done = true; +}; + +//alias for backwards compat +Parser.prototype.parseChunk = Parser.prototype.write; +Parser.prototype.done = Parser.prototype.end; + +module.exports = Parser; |