Files: f65379392f63738750a80ce19d6c9d6d454c86d3 / lib / pack.js
11347 bytesRaw
1 | var buffered = require('pull-buffered') |
2 | var pull = require('pull-stream') |
3 | var toPull = require('stream-to-pull-stream') |
4 | var pako = require('pako') |
5 | var createHash = require('./util').createHash |
6 | var cat = require('pull-cat') |
7 | |
8 | exports.decode = decodePack |
9 | exports.encode = encodePack |
10 | |
11 | var PACK_VERSION = 2 |
12 | |
13 | var objectTypes = [ |
14 | 'none', 'commit', 'tree', 'blob', |
15 | 'tag', 'unused', 'ofs-delta', 'ref-delta' |
16 | ] |
17 | var objectTypeNums = { |
18 | commit: 1, |
19 | tree: 2, |
20 | blob: 3, |
21 | tag: 4, |
22 | 'ofs-delta': 6, |
23 | 'ref-delta': 7 |
24 | } |
25 | |
26 | function error(cb) { |
27 | return function (err) { |
28 | cb(err || true) |
29 | } |
30 | } |
31 | |
32 | function inflateBytes(read) { |
33 | var inflate = new pako.Inflate() |
34 | var ended, dataOut |
35 | |
36 | inflate.onData = function (data) { |
37 | dataOut = new Buffer(data) |
38 | // console.error('inflated data', data.length) |
39 | } |
40 | |
41 | inflate.onEnd = function (status) { |
42 | ended = (status === 0) ? true : new Error(inflate.strm.msg) |
43 | // console.error('inflated end', status, ended) |
44 | } |
45 | |
46 | return function (abort, cb) { |
47 | if (ended) return cb(ended) |
48 | read(abort, function next(end, data) { |
49 | if (end === true) { |
50 | end = null |
51 | data = [] |
52 | } |
53 | if (ended = end) return cb(end) |
54 | if (data.length > 1) return cb(new Error('got more than one byte')) |
55 | dataOut = null |
56 | inflate.push(data, end === true) |
57 | if (dataOut) |
58 | cb(null, dataOut) |
59 | else if (ended) |
60 | cb(ended) |
61 | else |
62 | // let the stack unwind |
63 | setImmediate(function () { |
64 | read(null, next) |
65 | }) |
66 | }) |
67 | } |
68 | } |
69 | |
70 | function deflate(read) { |
71 | var def = new pako.Deflate() |
72 | var queue = [] |
73 | var ended |
74 | |
75 | def.onData = function (data) { |
76 | queue.push([null, new Buffer(data)]) |
77 | } |
78 | |
79 | def.onEnd = function (status) { |
80 | queue.push([(status === 0) ? true : new Error(def.strm.msg)]) |
81 | } |
82 | |
83 | return function readOut(abort, cb) { |
84 | if (ended) |
85 | cb(ended) |
86 | else if (queue.length) |
87 | cb.apply(this, queue.shift()) |
88 | else |
89 | read(abort, function next(end, data) { |
90 | if (end === true) def.push([], true) |
91 | else if (end) return cb(end) |
92 | else def.push(data) |
93 | readOut(null, cb) |
94 | }) |
95 | } |
96 | } |
97 | |
98 | function decodePack(opts, repo, onEnd, read) { |
99 | if (read === undefined) |
100 | return decodePack.bind(this, opts, repo, onEnd) |
101 | onEnd = onEnd || function(err) { |
102 | if (err) throw err |
103 | } |
104 | |
105 | var ended |
106 | var inObject = false |
107 | var numObjects = -1 |
108 | var checksum = createHash('sha1') |
109 | var b = buffered(read) |
110 | // TODO: optimize to pass through buffers to checksum |
111 | var readByte = checksum(b.chunks(1)) |
112 | var readWord = checksum(b.chunks(4)) |
113 | var readHash = checksum(b.chunks(20)) |
114 | var readChecksum = b.chunks(20) |
115 | var expectChecksum = true |
116 | var _cb |
117 | |
118 | function readHeader(cb) { |
119 | readWord(null, function (end, header) { |
120 | if (ended = end) return cb(end) |
121 | if (!header.equals(header, new Buffer('PACK'))) |
122 | read(new Error('Invalid packfile header'), error(cb)) |
123 | else |
124 | readVersion(cb) |
125 | }) |
126 | } |
127 | |
128 | function readVersion(cb) { |
129 | readWord(null, function (end, word) { |
130 | if (ended = end) return cb(end) |
131 | var version = word.readUInt32BE() |
132 | if (version < 2 || version > 3) |
133 | read(new Error('Invalid packfile version ' + version), error(cb)) |
134 | else |
135 | readNumObjects(cb) |
136 | }) |
137 | } |
138 | |
139 | function readNumObjects(cb) { |
140 | readWord(null, function (end, word) { |
141 | if (ended = end) return cb(end) |
142 | numObjects = word.readUInt32BE() |
143 | if (opts.verbosity >= 2) |
144 | console.error(numObjects + ' objects') |
145 | if (opts.onHeader) |
146 | opts.onHeader(numObjects) |
147 | readObject(null, cb) |
148 | }) |
149 | } |
150 | |
151 | function readTypedVarInt(cb) { |
152 | var type, value, shift |
153 | // https://codewords.recurse.com/images/three/varint.svg |
154 | readByte(null, function (end, buf) { |
155 | if (ended = end) return cb(end) |
156 | var firstByte = buf[0] |
157 | type = objectTypes[(firstByte >> 4) & 7] |
158 | value = firstByte & 15 |
159 | // console.error('byte1', firstByte, firstByte.toString(2)) |
160 | shift = 4 |
161 | checkByte(firstByte) |
162 | }) |
163 | |
164 | function checkByte(byte) { |
165 | if (byte & 0x80) |
166 | readByte(null, gotByte) |
167 | else |
168 | cb(null, type, value) |
169 | } |
170 | |
171 | function gotByte(end, buf) { |
172 | if (ended = end) return cb(end) |
173 | var byte = buf[0] |
174 | value += (byte & 0x7f) << shift |
175 | shift += 7 |
176 | // console.error('byte', byte, byte.toString(2)) |
177 | checkByte(byte) |
178 | } |
179 | } |
180 | |
181 | function getObject(cb) { |
182 | inObject = true |
183 | readTypedVarInt(function (end, type, length) { |
184 | if (opts.verbosity >= 2) |
185 | console.error('read object header', end, type, length) |
186 | numObjects-- |
187 | if (end === true && expectChecksum) |
188 | onEnd(new Error('Missing checksum')) |
189 | if (ended = end) return cb(end) |
190 | // TODO: verify that the inflated data is the correct length |
191 | if (type == 'ref-delta') |
192 | getObjectFromRefDelta(length, gotObject) |
193 | else |
194 | gotObject(null, { |
195 | type: type, |
196 | length: length, |
197 | read: inflateBytes(readByte) |
198 | }) |
199 | }) |
200 | |
201 | function gotObject(err, obj) { |
202 | // pass through the object but detect when it ends |
203 | if (err) return cb(err) |
204 | cb(null, { |
205 | type: obj.type, |
206 | length: obj.length, |
207 | read: pull( |
208 | obj.read, |
209 | pull.through(null, function () { |
210 | inObject = false |
211 | if (_cb) { |
212 | var cb = _cb |
213 | readObject(null, cb) |
214 | } |
215 | }) |
216 | ) |
217 | }) |
218 | } |
219 | } |
220 | |
221 | // TODO: test with ref-delta objects in pack |
222 | function getObjectFromRefDelta(length, cb) { |
223 | readHash(null, function (end, sourceHash) { |
224 | if (end) return cb(end) |
225 | sourceHash = sourceHash.toString('hex') |
226 | var b = buffered(inflateBytes(readByte)) |
227 | var readInflatedByte = b.chunks(1) |
228 | readVarInt(readInflatedByte, function (err, expectedSourceLength) { |
229 | if (err) return cb(err) |
230 | readVarInt(readInflatedByte, function (err, expectedTargetLength) { |
231 | if (err) return cb(err) |
232 | if (opts.verbosity >= 3) |
233 | console.error('getting object', sourceHash) |
234 | repo.getObject(sourceHash, function (err, sourceObject) { |
235 | if (opts.verbosity >= 3) |
236 | console.error('got object', sourceHash, sourceObject, err) |
237 | if (err) return cb(err) |
238 | if (sourceObject.length != expectedSourceLength) |
239 | cb(new Error('Incorrect source object size in ref delta')) |
240 | else |
241 | patchObject(opts, b, length, sourceObject, |
242 | expectedTargetLength, cb) |
243 | }) |
244 | }) |
245 | }) |
246 | }) |
247 | } |
248 | |
249 | function readTrailer(cb) { |
250 | // read the checksum before it updates to include the trailer |
251 | var expected = checksum.digest() |
252 | readChecksum(null, function (end, value) { |
253 | cb(true) |
254 | if (end === true && expectChecksum) |
255 | onEnd(new Error('Missing checksum')) |
256 | if (!value.equals(expected)) { |
257 | onEnd(new Error('Checksum mismatch: ' + |
258 | expected.hexSlice() + ' != ' + value.hexSlice())) |
259 | } else { |
260 | if (opts.verbosity >= 3) |
261 | console.error('checksum ok', expected.hexSlice()) |
262 | onEnd(null) |
263 | } |
264 | }) |
265 | } |
266 | |
267 | function readObject(abort, cb) { |
268 | if (ended) cb(ended) |
269 | else if (inObject) _cb = cb |
270 | else if (abort) read(abort, function (err) { cb(ended = err || abort) }) |
271 | else if (numObjects < 0) readHeader(cb) |
272 | else if (numObjects > 0) getObject(cb) |
273 | else if (expectChecksum) readTrailer(cb) |
274 | } |
275 | |
276 | return readObject |
277 | } |
278 | |
279 | function readVarInt(readByte, cb) { |
280 | var value = 0, shift = 0 |
281 | readByte(null, function gotByte(end, buf) { |
282 | if (ended = end) return cb(end) |
283 | var byte = buf[0] |
284 | value += (byte & 0x7f) << shift |
285 | shift += 7 |
286 | if (byte & 0x80) |
287 | readByte(null, gotByte) |
288 | else |
289 | cb(null, value) |
290 | }) |
291 | } |
292 | |
293 | function patchObject(opts, deltaB, deltaLength, srcObject, targetLength, cb) { |
294 | var readByte = deltaB.chunks(1) |
295 | var srcBuf |
296 | var ended |
297 | |
298 | if (opts.verbosity >= 2) |
299 | console.error('patching', srcObject.type, targetLength) |
300 | pull( |
301 | srcObject.read, |
302 | pull.collect(function (err, bufs) { |
303 | srcBuf = Buffer.concat(bufs, srcObject.length) |
304 | cb(null, { |
305 | type: srcObject.type, |
306 | length: targetLength, |
307 | read: read |
308 | }) |
309 | }) |
310 | ) |
311 | |
312 | function read(abort, cb) { |
313 | if (ended) return cb(ended) |
314 | readByte(null, function (end, dBuf) { |
315 | if (ended = end) return cb(end) |
316 | var cmd = dBuf[0] |
317 | if (cmd & 0x80) |
318 | // skip a variable amount and then pass through a variable amount |
319 | readOffsetSize(cmd, deltaB, function (err, offset, size) { |
320 | if (err) return earlyEnd(err) |
321 | var buf = srcBuf.slice(offset, offset + size) |
322 | cb(end, buf) |
323 | }) |
324 | else if (cmd) |
325 | // insert `cmd` bytes from delta |
326 | deltaB.chunks(cmd)(null, cb) |
327 | else |
328 | cb(new Error("unexpected delta opcode 0")) |
329 | }) |
330 | |
331 | function earlyEnd(err) { |
332 | cb(err === true ? new Error('stream ended early') : err) |
333 | } |
334 | } |
335 | } |
336 | |
337 | function readOffsetSize(cmd, b, readCb) { |
338 | var readByte = b.chunks(1) |
339 | var offset = 0, size = 0 |
340 | |
341 | function addByte(bit, outPos, cb) { |
342 | if (cmd & (1 << bit)) |
343 | readByte(null, function (err, buf) { |
344 | if (err) readCb(err) |
345 | else cb(buf[0] << (outPos << 3)) |
346 | }) |
347 | else |
348 | cb(0) |
349 | } |
350 | |
351 | addByte(0, 0, function (val) { |
352 | offset = val |
353 | addByte(1, 1, function (val) { |
354 | offset |= val |
355 | addByte(2, 2, function (val) { |
356 | offset |= val |
357 | addByte(3, 3, function (val) { |
358 | offset |= val |
359 | addSize() |
360 | }) |
361 | }) |
362 | }) |
363 | }) |
364 | function addSize() { |
365 | addByte(4, 0, function (val) { |
366 | size = val |
367 | addByte(5, 1, function (val) { |
368 | size |= val |
369 | addByte(6, 2, function (val) { |
370 | size |= val |
371 | readCb(null, offset, size || 0x10000) |
372 | }) |
373 | }) |
374 | }) |
375 | } |
376 | } |
377 | |
378 | function encodeTypedVarInt(typeStr, length, cb) { |
379 | var type = objectTypeNums[typeStr] |
380 | // console.error('TYPE', type, typeStr, 'len', length, typeof cb) |
381 | if (!type) |
382 | return cb(new Error("Bad object type " + typeStr)) |
383 | |
384 | var vals = [] |
385 | var b = (type << 4) | (length & 15) |
386 | for (length >>= 4; length; length >>= 7) { |
387 | vals.push(b | 0x80) |
388 | b = length & 0x7f |
389 | } |
390 | vals.push(b) |
391 | /* |
392 | console.error('sending var int', vals, vals.map(function (n) { |
393 | return ('00000000' + Number(n).toString(2)).substr(-8) |
394 | })) |
395 | */ |
396 | cb(null, new Buffer(vals)) |
397 | } |
398 | |
399 | function encodePack(opts, numObjects, readObject) { |
400 | if (numObjects === undefined) |
401 | numObjects = opts, opts = null |
402 | if (readObject === undefined) |
403 | return encodePack.bind(this, opts, numObjects) |
404 | |
405 | var header = new Buffer(12) |
406 | header.write('PACK') |
407 | header.writeUInt32BE(PACK_VERSION, 4) |
408 | header.writeUInt32BE(numObjects, 8) |
409 | var checksum = createHash('sha1') |
410 | var readData |
411 | |
412 | return cat([ |
413 | checksum(cat([ |
414 | pull.once(header), |
415 | encodeObject |
416 | ])), |
417 | checksum.readDigest |
418 | ]) |
419 | |
420 | function encodeObject(abort, cb) { |
421 | if (readData) |
422 | readData(abort, function (end, data) { |
423 | if (end === true) |
424 | readObject(abort, nextObject) |
425 | else |
426 | cb(end, data) |
427 | }) |
428 | else |
429 | readObject(abort, nextObject) |
430 | |
431 | function nextObject(end, object) { |
432 | if (end) return cb(end) |
433 | readData = deflate(object.read) |
434 | encodeTypedVarInt(object.type, object.length, cb) |
435 | } |
436 | } |
437 | } |
438 |
Built with git-ssb-web