This commit is contained in:
Domen Kožar 2019-11-19 17:50:30 +01:00
parent cd5893b2c6
commit 70742d22d9
No known key found for this signature in database
GPG key ID: C2FFBCAFD2C24246
6774 changed files with 1602535 additions and 1 deletions

14
node_modules/parse5/lib/sax/dev_null_stream.js generated vendored Normal file
View file

@ -0,0 +1,14 @@
'use strict';
var WritableStream = require('stream').Writable,
util = require('util');
var DevNullStream = module.exports = function () {
WritableStream.call(this);
};
util.inherits(DevNullStream, WritableStream);
DevNullStream.prototype._write = function (chunk, encoding, cb) {
cb();
};

118
node_modules/parse5/lib/sax/index.js generated vendored Normal file
View file

@ -0,0 +1,118 @@
'use strict';
var TransformStream = require('stream').Transform,
DevNullStream = require('./dev_null_stream'),
inherits = require('util').inherits,
Tokenizer = require('../tokenizer'),
LocationInfoTokenizerMixin = require('../extensions/location_info/tokenizer_mixin'),
ParserFeedbackSimulator = require('./parser_feedback_simulator'),
mergeOptions = require('../utils/merge_options');
var DEFAULT_OPTIONS = {
locationInfo: false
};
var SAXParser = module.exports = function (options) {
TransformStream.call(this);
this.options = mergeOptions(DEFAULT_OPTIONS, options);
this.tokenizer = new Tokenizer(options);
if (this.options.locationInfo)
new LocationInfoTokenizerMixin(this.tokenizer);
this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.tokenizer);
this.pendingText = null;
this.currentTokenLocation = void 0;
this.lastChunkWritten = false;
this.stopped = false;
// NOTE: always pipe stream to the /dev/null stream to avoid
// `highWaterMark` hit even if we don't have consumers.
// (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774)
this.pipe(new DevNullStream());
};
inherits(SAXParser, TransformStream);
//TransformStream implementation
SAXParser.prototype._transform = function (chunk, encoding, callback) {
if (!this.stopped) {
this.tokenizer.write(chunk.toString('utf8'), this.lastChunkWritten);
this._runParsingLoop();
}
this.push(chunk);
callback();
};
SAXParser.prototype._flush = function (callback) {
callback();
};
SAXParser.prototype.end = function (chunk, encoding, callback) {
this.lastChunkWritten = true;
TransformStream.prototype.end.call(this, chunk, encoding, callback);
};
SAXParser.prototype.stop = function () {
this.stopped = true;
};
//Internals
SAXParser.prototype._runParsingLoop = function () {
do {
var token = this.parserFeedbackSimulator.getNextToken();
if (token.type === Tokenizer.HIBERNATION_TOKEN)
break;
if (token.type === Tokenizer.CHARACTER_TOKEN ||
token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN ||
token.type === Tokenizer.NULL_CHARACTER_TOKEN) {
if (this.options.locationInfo) {
if (this.pendingText === null)
this.currentTokenLocation = token.location;
else
this.currentTokenLocation.endOffset = token.location.endOffset;
}
this.pendingText = (this.pendingText || '') + token.chars;
}
else {
this._emitPendingText();
this._handleToken(token);
}
} while (!this.stopped && token.type !== Tokenizer.EOF_TOKEN);
};
SAXParser.prototype._handleToken = function (token) {
if (this.options.locationInfo)
this.currentTokenLocation = token.location;
if (token.type === Tokenizer.START_TAG_TOKEN)
this.emit('startTag', token.tagName, token.attrs, token.selfClosing, this.currentTokenLocation);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this.emit('endTag', token.tagName, this.currentTokenLocation);
else if (token.type === Tokenizer.COMMENT_TOKEN)
this.emit('comment', token.data, this.currentTokenLocation);
else if (token.type === Tokenizer.DOCTYPE_TOKEN)
this.emit('doctype', token.name, token.publicId, token.systemId, this.currentTokenLocation);
};
SAXParser.prototype._emitPendingText = function () {
if (this.pendingText !== null) {
this.emit('text', this.pendingText, this.currentTokenLocation);
this.pendingText = null;
}
};

View file

@ -0,0 +1,153 @@
'use strict';
var Tokenizer = require('../tokenizer'),
foreignContent = require('../common/foreign_content'),
UNICODE = require('../common/unicode'),
HTML = require('../common/html');
//Aliases
var $ = HTML.TAG_NAMES,
NS = HTML.NAMESPACES;
//ParserFeedbackSimulator
//Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
var ParserFeedbackSimulator = module.exports = function (tokenizer) {
this.tokenizer = tokenizer;
this.namespaceStack = [];
this.namespaceStackTop = -1;
this._enterNamespace(NS.HTML);
};
ParserFeedbackSimulator.prototype.getNextToken = function () {
var token = this.tokenizer.getNextToken();
if (token.type === Tokenizer.START_TAG_TOKEN)
this._handleStartTagToken(token);
else if (token.type === Tokenizer.END_TAG_TOKEN)
this._handleEndTagToken(token);
else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
token.type = Tokenizer.CHARACTER_TOKEN;
token.chars = UNICODE.REPLACEMENT_CHARACTER;
}
else if (this.skipNextNewLine) {
if (token.type !== Tokenizer.HIBERNATION_TOKEN)
this.skipNextNewLine = false;
if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
if (token.chars.length === 1)
return this.getNextToken();
token.chars = token.chars.substr(1);
}
}
return token;
};
//Namespace stack mutations
ParserFeedbackSimulator.prototype._enterNamespace = function (namespace) {
this.namespaceStackTop++;
this.namespaceStack.push(namespace);
this.inForeignContent = namespace !== NS.HTML;
this.currentNamespace = namespace;
this.tokenizer.allowCDATA = this.inForeignContent;
};
ParserFeedbackSimulator.prototype._leaveCurrentNamespace = function () {
this.namespaceStackTop--;
this.namespaceStack.pop();
this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
this.inForeignContent = this.currentNamespace !== NS.HTML;
this.tokenizer.allowCDATA = this.inForeignContent;
};
//Token handlers
ParserFeedbackSimulator.prototype._ensureTokenizerMode = function (tn) {
if (tn === $.TEXTAREA || tn === $.TITLE)
this.tokenizer.state = Tokenizer.MODE.RCDATA;
else if (tn === $.PLAINTEXT)
this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
else if (tn === $.SCRIPT)
this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT)
this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
};
ParserFeedbackSimulator.prototype._handleStartTagToken = function (token) {
var tn = token.tagName;
if (tn === $.SVG)
this._enterNamespace(NS.SVG);
else if (tn === $.MATH)
this._enterNamespace(NS.MATHML);
if (this.inForeignContent) {
if (foreignContent.causesExit(token)) {
this._leaveCurrentNamespace();
return;
}
var currentNs = this.currentNamespace;
if (currentNs === NS.MATHML)
foreignContent.adjustTokenMathMLAttrs(token);
else if (currentNs === NS.SVG) {
foreignContent.adjustTokenSVGTagName(token);
foreignContent.adjustTokenSVGAttrs(token);
}
foreignContent.adjustTokenXMLAttrs(token);
tn = token.tagName;
if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs))
this._enterNamespace(NS.HTML);
}
else {
if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING)
this.skipNextNewLine = true;
else if (tn === $.IMAGE)
token.tagName = $.IMG;
this._ensureTokenizerMode(tn);
}
};
ParserFeedbackSimulator.prototype._handleEndTagToken = function (token) {
var tn = token.tagName;
if (!this.inForeignContent) {
var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn])
tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
//NOTE: check for exit from integration point
if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs))
this._leaveCurrentNamespace();
}
else if (tn === $.SVG && this.currentNamespace === NS.SVG ||
tn === $.MATH && this.currentNamespace === NS.MATHML)
this._leaveCurrentNamespace();
// NOTE: adjust end tag name as well for consistency
if (this.currentNamespace === NS.SVG)
foreignContent.adjustTokenSVGTagName(token);
};