82f2b76e25
We now use webpack instead of SystemJS, effectively bundling modules into one file (plus commons chunks) for every entry point. This results in a much smaller extension size (almost half). Furthermore we use yarn/npm even for extension run-time dependencies. This relieves us from manually vendoring and building dependencies. It's also easier to understand for new developers familiar with node.
177 lines
4.8 KiB
JavaScript
177 lines
4.8 KiB
JavaScript
"use strict"
|
|
|
|
// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
|
|
|
|
// == UTF16-BE codec. ==========================================================
|
|
|
|
exports.utf16be = Utf16BECodec;
|
|
function Utf16BECodec() {
|
|
}
|
|
|
|
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
|
|
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
|
|
Utf16BECodec.prototype.bomAware = true;
|
|
|
|
|
|
// -- Encoding
|
|
|
|
function Utf16BEEncoder() {
|
|
}
|
|
|
|
Utf16BEEncoder.prototype.write = function(str) {
|
|
var buf = new Buffer(str, 'ucs2');
|
|
for (var i = 0; i < buf.length; i += 2) {
|
|
var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
Utf16BEEncoder.prototype.end = function() {
|
|
}
|
|
|
|
|
|
// -- Decoding
|
|
|
|
function Utf16BEDecoder() {
|
|
this.overflowByte = -1;
|
|
}
|
|
|
|
Utf16BEDecoder.prototype.write = function(buf) {
|
|
if (buf.length == 0)
|
|
return '';
|
|
|
|
var buf2 = new Buffer(buf.length + 1),
|
|
i = 0, j = 0;
|
|
|
|
if (this.overflowByte !== -1) {
|
|
buf2[0] = buf[0];
|
|
buf2[1] = this.overflowByte;
|
|
i = 1; j = 2;
|
|
}
|
|
|
|
for (; i < buf.length-1; i += 2, j+= 2) {
|
|
buf2[j] = buf[i+1];
|
|
buf2[j+1] = buf[i];
|
|
}
|
|
|
|
this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
|
|
|
|
return buf2.slice(0, j).toString('ucs2');
|
|
}
|
|
|
|
Utf16BEDecoder.prototype.end = function() {
|
|
}
|
|
|
|
|
|
// == UTF-16 codec =============================================================
|
|
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
|
|
// Defaults to UTF-16LE, as it's prevalent and default in Node.
|
|
// http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
|
|
// Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
|
|
|
|
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
|
|
|
|
exports.utf16 = Utf16Codec;
|
|
function Utf16Codec(codecOptions, iconv) {
|
|
this.iconv = iconv;
|
|
}
|
|
|
|
Utf16Codec.prototype.encoder = Utf16Encoder;
|
|
Utf16Codec.prototype.decoder = Utf16Decoder;
|
|
|
|
|
|
// -- Encoding (pass-through)
|
|
|
|
function Utf16Encoder(options, codec) {
|
|
options = options || {};
|
|
if (options.addBOM === undefined)
|
|
options.addBOM = true;
|
|
this.encoder = codec.iconv.getEncoder('utf-16le', options);
|
|
}
|
|
|
|
Utf16Encoder.prototype.write = function(str) {
|
|
return this.encoder.write(str);
|
|
}
|
|
|
|
Utf16Encoder.prototype.end = function() {
|
|
return this.encoder.end();
|
|
}
|
|
|
|
|
|
// -- Decoding
|
|
|
|
function Utf16Decoder(options, codec) {
|
|
this.decoder = null;
|
|
this.initialBytes = [];
|
|
this.initialBytesLen = 0;
|
|
|
|
this.options = options || {};
|
|
this.iconv = codec.iconv;
|
|
}
|
|
|
|
Utf16Decoder.prototype.write = function(buf) {
|
|
if (!this.decoder) {
|
|
// Codec is not chosen yet. Accumulate initial bytes.
|
|
this.initialBytes.push(buf);
|
|
this.initialBytesLen += buf.length;
|
|
|
|
if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
|
|
return '';
|
|
|
|
// We have enough bytes -> detect endianness.
|
|
var buf = Buffer.concat(this.initialBytes),
|
|
encoding = detectEncoding(buf, this.options.defaultEncoding);
|
|
this.decoder = this.iconv.getDecoder(encoding, this.options);
|
|
this.initialBytes.length = this.initialBytesLen = 0;
|
|
}
|
|
|
|
return this.decoder.write(buf);
|
|
}
|
|
|
|
Utf16Decoder.prototype.end = function() {
|
|
if (!this.decoder) {
|
|
var buf = Buffer.concat(this.initialBytes),
|
|
encoding = detectEncoding(buf, this.options.defaultEncoding);
|
|
this.decoder = this.iconv.getDecoder(encoding, this.options);
|
|
|
|
var res = this.decoder.write(buf),
|
|
trail = this.decoder.end();
|
|
|
|
return trail ? (res + trail) : res;
|
|
}
|
|
return this.decoder.end();
|
|
}
|
|
|
|
function detectEncoding(buf, defaultEncoding) {
|
|
var enc = defaultEncoding || 'utf-16le';
|
|
|
|
if (buf.length >= 2) {
|
|
// Check BOM.
|
|
if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
|
|
enc = 'utf-16be';
|
|
else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
|
|
enc = 'utf-16le';
|
|
else {
|
|
// No BOM found. Try to deduce encoding from initial content.
|
|
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
|
|
// So, we count ASCII as if it was LE or BE, and decide from that.
|
|
var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
|
|
_len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
|
|
|
|
for (var i = 0; i < _len; i += 2) {
|
|
if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
|
|
if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
|
|
}
|
|
|
|
if (asciiCharsBE > asciiCharsLE)
|
|
enc = 'utf-16be';
|
|
else if (asciiCharsBE < asciiCharsLE)
|
|
enc = 'utf-16le';
|
|
}
|
|
}
|
|
|
|
return enc;
|
|
}
|
|
|
|
|