Files: d2f2697f296dd39aed6a8b63c6d04a736a7db5b3 / node_modules / string_decoder / lib / string_decoder.js
9465 bytesRaw
1 | // Copyright Joyent, Inc. and other Node contributors. |
2 | // |
3 | // Permission is hereby granted, free of charge, to any person obtaining a |
4 | // copy of this software and associated documentation files (the |
5 | // "Software"), to deal in the Software without restriction, including |
6 | // without limitation the rights to use, copy, modify, merge, publish, |
7 | // distribute, sublicense, and/or sell copies of the Software, and to permit |
8 | // persons to whom the Software is furnished to do so, subject to the |
9 | // following conditions: |
10 | // |
11 | // The above copyright notice and this permission notice shall be included |
12 | // in all copies or substantial portions of the Software. |
13 | // |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
15 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
16 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
17 | // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
18 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
19 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
20 | // USE OR OTHER DEALINGS IN THE SOFTWARE. |
21 | |
22 | 'use strict'; |
23 | |
24 | /*<replacement>*/ |
25 | |
26 | var Buffer = require('safe-buffer').Buffer; |
27 | /*</replacement>*/ |
28 | |
29 | var isEncoding = Buffer.isEncoding || function (encoding) { |
30 | encoding = '' + encoding; |
31 | switch (encoding && encoding.toLowerCase()) { |
32 | case 'hex':case 'utf8':case 'utf-8':case 'ascii':case 'binary':case 'base64':case 'ucs2':case 'ucs-2':case 'utf16le':case 'utf-16le':case 'raw': |
33 | return true; |
34 | default: |
35 | return false; |
36 | } |
37 | }; |
38 | |
39 | function _normalizeEncoding(enc) { |
40 | if (!enc) return 'utf8'; |
41 | var retried; |
42 | while (true) { |
43 | switch (enc) { |
44 | case 'utf8': |
45 | case 'utf-8': |
46 | return 'utf8'; |
47 | case 'ucs2': |
48 | case 'ucs-2': |
49 | case 'utf16le': |
50 | case 'utf-16le': |
51 | return 'utf16le'; |
52 | case 'latin1': |
53 | case 'binary': |
54 | return 'latin1'; |
55 | case 'base64': |
56 | case 'ascii': |
57 | case 'hex': |
58 | return enc; |
59 | default: |
60 | if (retried) return; // undefined |
61 | enc = ('' + enc).toLowerCase(); |
62 | retried = true; |
63 | } |
64 | } |
65 | }; |
66 | |
67 | // Do not cache `Buffer.isEncoding` when checking encoding names as some |
68 | // modules monkey-patch it to support additional encodings |
69 | function normalizeEncoding(enc) { |
70 | var nenc = _normalizeEncoding(enc); |
71 | if (typeof nenc !== 'string' && (Buffer.isEncoding === isEncoding || !isEncoding(enc))) throw new Error('Unknown encoding: ' + enc); |
72 | return nenc || enc; |
73 | } |
74 | |
75 | // StringDecoder provides an interface for efficiently splitting a series of |
76 | // buffers into a series of JS strings without breaking apart multi-byte |
77 | // characters. |
78 | exports.StringDecoder = StringDecoder; |
79 | function StringDecoder(encoding) { |
80 | this.encoding = normalizeEncoding(encoding); |
81 | var nb; |
82 | switch (this.encoding) { |
83 | case 'utf16le': |
84 | this.text = utf16Text; |
85 | this.end = utf16End; |
86 | nb = 4; |
87 | break; |
88 | case 'utf8': |
89 | this.fillLast = utf8FillLast; |
90 | nb = 4; |
91 | break; |
92 | case 'base64': |
93 | this.text = base64Text; |
94 | this.end = base64End; |
95 | nb = 3; |
96 | break; |
97 | default: |
98 | this.write = simpleWrite; |
99 | this.end = simpleEnd; |
100 | return; |
101 | } |
102 | this.lastNeed = 0; |
103 | this.lastTotal = 0; |
104 | this.lastChar = Buffer.allocUnsafe(nb); |
105 | } |
106 | |
107 | StringDecoder.prototype.write = function (buf) { |
108 | if (buf.length === 0) return ''; |
109 | var r; |
110 | var i; |
111 | if (this.lastNeed) { |
112 | r = this.fillLast(buf); |
113 | if (r === undefined) return ''; |
114 | i = this.lastNeed; |
115 | this.lastNeed = 0; |
116 | } else { |
117 | i = 0; |
118 | } |
119 | if (i < buf.length) return r ? r + this.text(buf, i) : this.text(buf, i); |
120 | return r || ''; |
121 | }; |
122 | |
123 | StringDecoder.prototype.end = utf8End; |
124 | |
125 | // Returns only complete characters in a Buffer |
126 | StringDecoder.prototype.text = utf8Text; |
127 | |
128 | // Attempts to complete a partial non-UTF-8 character using bytes from a Buffer |
129 | StringDecoder.prototype.fillLast = function (buf) { |
130 | if (this.lastNeed <= buf.length) { |
131 | buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed); |
132 | return this.lastChar.toString(this.encoding, 0, this.lastTotal); |
133 | } |
134 | buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length); |
135 | this.lastNeed -= buf.length; |
136 | }; |
137 | |
138 | // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a |
139 | // continuation byte. If an invalid byte is detected, -2 is returned. |
140 | function utf8CheckByte(byte) { |
141 | if (byte <= 0x7F) return 0;else if (byte >> 5 === 0x06) return 2;else if (byte >> 4 === 0x0E) return 3;else if (byte >> 3 === 0x1E) return 4; |
142 | return byte >> 6 === 0x02 ? -1 : -2; |
143 | } |
144 | |
145 | // Checks at most 3 bytes at the end of a Buffer in order to detect an |
146 | // incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4) |
147 | // needed to complete the UTF-8 character (if applicable) are returned. |
148 | function utf8CheckIncomplete(self, buf, i) { |
149 | var j = buf.length - 1; |
150 | if (j < i) return 0; |
151 | var nb = utf8CheckByte(buf[j]); |
152 | if (nb >= 0) { |
153 | if (nb > 0) self.lastNeed = nb - 1; |
154 | return nb; |
155 | } |
156 | if (--j < i || nb === -2) return 0; |
157 | nb = utf8CheckByte(buf[j]); |
158 | if (nb >= 0) { |
159 | if (nb > 0) self.lastNeed = nb - 2; |
160 | return nb; |
161 | } |
162 | if (--j < i || nb === -2) return 0; |
163 | nb = utf8CheckByte(buf[j]); |
164 | if (nb >= 0) { |
165 | if (nb > 0) { |
166 | if (nb === 2) nb = 0;else self.lastNeed = nb - 3; |
167 | } |
168 | return nb; |
169 | } |
170 | return 0; |
171 | } |
172 | |
173 | // Validates as many continuation bytes for a multi-byte UTF-8 character as |
174 | // needed or are available. If we see a non-continuation byte where we expect |
175 | // one, we "replace" the validated continuation bytes we've seen so far with |
176 | // a single UTF-8 replacement character ('\ufffd'), to match v8's UTF-8 decoding |
177 | // behavior. The continuation byte check is included three times in the case |
178 | // where all of the continuation bytes for a character exist in the same buffer. |
179 | // It is also done this way as a slight performance increase instead of using a |
180 | // loop. |
181 | function utf8CheckExtraBytes(self, buf, p) { |
182 | if ((buf[0] & 0xC0) !== 0x80) { |
183 | self.lastNeed = 0; |
184 | return '\ufffd'; |
185 | } |
186 | if (self.lastNeed > 1 && buf.length > 1) { |
187 | if ((buf[1] & 0xC0) !== 0x80) { |
188 | self.lastNeed = 1; |
189 | return '\ufffd'; |
190 | } |
191 | if (self.lastNeed > 2 && buf.length > 2) { |
192 | if ((buf[2] & 0xC0) !== 0x80) { |
193 | self.lastNeed = 2; |
194 | return '\ufffd'; |
195 | } |
196 | } |
197 | } |
198 | } |
199 | |
200 | // Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer. |
201 | function utf8FillLast(buf) { |
202 | var p = this.lastTotal - this.lastNeed; |
203 | var r = utf8CheckExtraBytes(this, buf, p); |
204 | if (r !== undefined) return r; |
205 | if (this.lastNeed <= buf.length) { |
206 | buf.copy(this.lastChar, p, 0, this.lastNeed); |
207 | return this.lastChar.toString(this.encoding, 0, this.lastTotal); |
208 | } |
209 | buf.copy(this.lastChar, p, 0, buf.length); |
210 | this.lastNeed -= buf.length; |
211 | } |
212 | |
213 | // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a |
214 | // partial character, the character's bytes are buffered until the required |
215 | // number of bytes are available. |
216 | function utf8Text(buf, i) { |
217 | var total = utf8CheckIncomplete(this, buf, i); |
218 | if (!this.lastNeed) return buf.toString('utf8', i); |
219 | this.lastTotal = total; |
220 | var end = buf.length - (total - this.lastNeed); |
221 | buf.copy(this.lastChar, 0, end); |
222 | return buf.toString('utf8', i, end); |
223 | } |
224 | |
225 | // For UTF-8, a replacement character is added when ending on a partial |
226 | // character. |
227 | function utf8End(buf) { |
228 | var r = buf && buf.length ? this.write(buf) : ''; |
229 | if (this.lastNeed) return r + '\ufffd'; |
230 | return r; |
231 | } |
232 | |
233 | // UTF-16LE typically needs two bytes per character, but even if we have an even |
234 | // number of bytes available, we need to check if we end on a leading/high |
235 | // surrogate. In that case, we need to wait for the next two bytes in order to |
236 | // decode the last character properly. |
237 | function utf16Text(buf, i) { |
238 | if ((buf.length - i) % 2 === 0) { |
239 | var r = buf.toString('utf16le', i); |
240 | if (r) { |
241 | var c = r.charCodeAt(r.length - 1); |
242 | if (c >= 0xD800 && c <= 0xDBFF) { |
243 | this.lastNeed = 2; |
244 | this.lastTotal = 4; |
245 | this.lastChar[0] = buf[buf.length - 2]; |
246 | this.lastChar[1] = buf[buf.length - 1]; |
247 | return r.slice(0, -1); |
248 | } |
249 | } |
250 | return r; |
251 | } |
252 | this.lastNeed = 1; |
253 | this.lastTotal = 2; |
254 | this.lastChar[0] = buf[buf.length - 1]; |
255 | return buf.toString('utf16le', i, buf.length - 1); |
256 | } |
257 | |
258 | // For UTF-16LE we do not explicitly append special replacement characters if we |
259 | // end on a partial character, we simply let v8 handle that. |
260 | function utf16End(buf) { |
261 | var r = buf && buf.length ? this.write(buf) : ''; |
262 | if (this.lastNeed) { |
263 | var end = this.lastTotal - this.lastNeed; |
264 | return r + this.lastChar.toString('utf16le', 0, end); |
265 | } |
266 | return r; |
267 | } |
268 | |
269 | function base64Text(buf, i) { |
270 | var n = (buf.length - i) % 3; |
271 | if (n === 0) return buf.toString('base64', i); |
272 | this.lastNeed = 3 - n; |
273 | this.lastTotal = 3; |
274 | if (n === 1) { |
275 | this.lastChar[0] = buf[buf.length - 1]; |
276 | } else { |
277 | this.lastChar[0] = buf[buf.length - 2]; |
278 | this.lastChar[1] = buf[buf.length - 1]; |
279 | } |
280 | return buf.toString('base64', i, buf.length - n); |
281 | } |
282 | |
283 | function base64End(buf) { |
284 | var r = buf && buf.length ? this.write(buf) : ''; |
285 | if (this.lastNeed) return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed); |
286 | return r; |
287 | } |
288 | |
289 | // Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex) |
290 | function simpleWrite(buf) { |
291 | return buf.toString(this.encoding); |
292 | } |
293 | |
294 | function simpleEnd(buf) { |
295 | return buf && buf.length ? this.write(buf) : ''; |
296 | } |
Built with git-ssb-web