wallet-core/node_modules/htmlparser2/lib/Parser.js
2017-05-03 15:35:00 +02:00

315 lines
7.3 KiB
JavaScript

var Tokenizer = require("./Tokenizer.js");
/*
Options:
xmlMode: Special behavior for script/style tags (true by default)
lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
*/
/*
Callbacks:
oncdataend,
oncdatastart,
onclosetag,
oncomment,
oncommentend,
onerror,
onopentag,
onprocessinginstruction,
onreset,
ontext
*/
var formTags = {
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
};
var openImpliesClose = {
tr : { tr:true, th:true, td:true },
th : { th:true },
td : { thead:true, td:true },
body : { head:true, link:true, script:true },
li : { li:true },
p : { p:true },
select : formTags,
input : formTags,
output : formTags,
button : formTags,
datalist: formTags,
textarea: formTags,
option : { option:true },
optgroup: { optgroup:true }
};
var voidElements = {
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
command: true,
embed: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
keygen: true,
link: true,
meta: true,
param: true,
source: true,
track: true,
wbr: true
};
var re_nameEnd = /\s|\//;
function Parser(cbs, options){
this._options = options || {};
this._cbs = cbs || {};
this._tagname = "";
this._attribname = "";
this._attribvalue = "";
this._attribs = null;
this._stack = [];
this._done = false;
this.startIndex = 0;
this.endIndex = null;
this._tokenizer = new Tokenizer(options, this);
}
require("util").inherits(Parser, require("events").EventEmitter);
Parser.prototype._updatePosition = function(initialOffset){
if(this.endIndex === null){
this.startIndex = this._tokenizer._sectionStart <= initialOffset ? 0 : this._tokenizer._sectionStart - initialOffset;
}
this.startIndex = this.endIndex + 1;
this.endIndex = this._tokenizer._index;
};
//Tokenizer event handlers
Parser.prototype.ontext = function(data){
this._updatePosition(1);
this.endIndex--;
if(this._cbs.ontext) this._cbs.ontext(data);
};
Parser.prototype.onopentagname = function(name){
if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
name = name.toLowerCase();
}
this._tagname = name;
if (!this._options.xmlMode && name in openImpliesClose) {
for(
var el;
(el = this._stack[this._stack.length-1]) in openImpliesClose[name];
this.onclosetag(el)
);
}
if(this._options.xmlMode || !(name in voidElements)){
this._stack.push(name);
}
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
if(this._cbs.onopentag) this._attribs = {};
};
Parser.prototype.onopentagend = function(){
this._updatePosition(1);
if(this._attribs){
if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
this._attribs = null;
}
if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
this._cbs.onclosetag(this._tagname);
}
this._tagname = "";
};
Parser.prototype.onclosetag = function(name){
this._updatePosition(1);
if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
name = name.toLowerCase();
}
if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
var pos = this._stack.lastIndexOf(name);
if(pos !== -1){
if(this._cbs.onclosetag){
pos = this._stack.length - pos;
while(pos--) this._cbs.onclosetag(this._stack.pop());
}
else this._stack.length = pos;
} else if(name === "p" && !this._options.xmlMode){
this.onopentagname(name);
this._closeCurrentTag();
}
} else if(!this._options.xmlMode && (name === "br" || name === "p")){
this.onopentagname(name);
this._closeCurrentTag();
}
};
Parser.prototype.onselfclosingtag = function(){
if(this._options.xmlMode){
this._closeCurrentTag();
} else {
this.onopentagend();
}
};
Parser.prototype._closeCurrentTag = function(){
var name = this._tagname;
this.onopentagend();
//self-closing tags will be on the top of the stack
//(cheaper check than in onclosetag)
if(this._stack[this._stack.length-1] === name){
if(this._cbs.onclosetag){
this._cbs.onclosetag(name);
}
this._stack.pop();
}
};
Parser.prototype.onattribname = function(name){
if(!(this._options.xmlMode || "lowerCaseAttributeNames" in this._options) || this._options.lowerCaseAttributeNames){
name = name.toLowerCase();
}
this._attribname = name;
};
Parser.prototype.onattribdata = function(value){
this._attribvalue += value;
};
Parser.prototype.onattribend = function(){
if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);
if(
this._attribs &&
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
){
this._attribs[this._attribname] = this._attribvalue;
}
this._attribname = "";
this._attribvalue = "";
};
Parser.prototype.ondeclaration = function(value){
if(this._cbs.onprocessinginstruction){
var idx = value.search(re_nameEnd),
name = idx < 0 ? value : value.substr(0, idx);
if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
name = name.toLowerCase();
}
this._cbs.onprocessinginstruction("!" + name, "!" + value);
}
};
Parser.prototype.onprocessinginstruction = function(value){
if(this._cbs.onprocessinginstruction){
var idx = value.search(re_nameEnd),
name = idx < 0 ? value : value.substr(0, idx);
if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
name = name.toLowerCase();
}
this._cbs.onprocessinginstruction("?" + name, "?" + value);
}
};
Parser.prototype.oncomment = function(value){
this._updatePosition(4);
if(this._cbs.oncomment) this._cbs.oncomment(value);
if(this._cbs.oncommentend) this._cbs.oncommentend();
};
Parser.prototype.oncdata = function(value){
this._updatePosition(1);
if(this._options.xmlMode){
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
if(this._cbs.ontext) this._cbs.ontext(value);
if(this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
}
};
Parser.prototype.onerror = function(err){
if(this._cbs.onerror) this._cbs.onerror(err);
};
Parser.prototype.onend = function(){
if(this._cbs.onclosetag){
for(
var i = this._stack.length;
i > 0;
this._cbs.onclosetag(this._stack[--i])
);
}
if(this._cbs.onend) this._cbs.onend();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function(){
if(this._cbs.onreset) this._cbs.onreset();
this._tokenizer.reset();
this._tagname = "";
this._attribname = "";
this._attribs = null;
this._stack = [];
this._done = false;
};
//Parses a complete HTML document and pushes it to the handler
Parser.prototype.parseComplete = function(data){
this.reset();
this.end(data);
};
Parser.prototype.write = function(chunk){
if(this._done) this.onerror(Error(".write() after done!"));
this._tokenizer.write(chunk);
};
Parser.prototype.end = function(chunk){
if(this._done) this.onerror(Error(".end() after done!"));
this._tokenizer.end(chunk);
this._done = true;
};
//alias for backwards compat
Parser.prototype.parseChunk = Parser.prototype.write;
Parser.prototype.done = Parser.prototype.end;
module.exports = Parser;