Files: d96c42c6804052c6ea48b6833b6d8b110d3baad8 / lib / git.js
18244 bytesRaw
1 | var pull = require('pull-stream') |
2 | var paramap = require('pull-paramap') |
3 | var lru = require('hashlru') |
4 | var memo = require('asyncmemo') |
5 | var u = require('./util') |
6 | var packidx = require('pull-git-packidx-parser') |
7 | var Reader = require('pull-reader') |
8 | var toPull = require('stream-to-pull-stream') |
9 | var zlib = require('zlib') |
10 | var looper = require('looper') |
11 | var multicb = require('multicb') |
12 | var kvdiff = require('pull-kvdiff') |
13 | |
14 | var ObjectNotFoundError = u.customError('ObjectNotFoundError') |
15 | |
16 | var types = { |
17 | blob: true, |
18 | commit: true, |
19 | tree: true, |
20 | } |
21 | var emptyBlobHash = 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391' |
22 | |
23 | module.exports = Git |
24 | |
25 | function Git(app) { |
26 | this.app = app |
27 | |
28 | this.findObject = memo({ |
29 | cache: lru(5), |
30 | asString: function (opts) { |
31 | return opts.obj + opts.headMsgId |
32 | } |
33 | }, this._findObject.bind(this)) |
34 | |
35 | this.findObjectInMsg = memo({ |
36 | cache: lru(5), |
37 | asString: function (opts) { |
38 | return opts.obj + opts.msg |
39 | } |
40 | }, this._findObjectInMsg.bind(this)) |
41 | |
42 | this.getPackIndex = memo({ |
43 | cache: lru(4), |
44 | asString: JSON.stringify |
45 | }, this._getPackIndex.bind(this)) |
46 | } |
47 | |
48 | // open, read, buffer and callback an object |
49 | Git.prototype.getObject = function (opts, cb) { |
50 | var self = this |
51 | self.openObject(opts, function (err, obj) { |
52 | if (err) return cb(err) |
53 | pull( |
54 | self.readObject(obj), |
55 | u.pullConcat(cb) |
56 | ) |
57 | }) |
58 | } |
59 | |
60 | // get a message that pushed an object |
61 | Git.prototype.getObjectMsg = function (opts, cb) { |
62 | this.findObject(opts, function (err, loc) { |
63 | if (err) return cb(err) |
64 | cb(null, loc.msg) |
65 | }) |
66 | } |
67 | |
68 | Git.prototype.openObject = function (opts, cb) { |
69 | var self = this |
70 | self.findObjectInMsg(opts, function (err, loc) { |
71 | if (err) return cb(err) |
72 | self.app.ensureHasBlobs([loc.packLink], function (err) { |
73 | if (err) return cb(err) |
74 | cb(null, { |
75 | type: opts.type, |
76 | length: opts.length, |
77 | offset: loc.offset, |
78 | next: loc.next, |
79 | packLink: loc.packLink, |
80 | idx: loc.idx, |
81 | msg: loc.msg, |
82 | }) |
83 | }) |
84 | }) |
85 | } |
86 | |
87 | Git.prototype.readObject = function (obj) { |
88 | if (obj.offset === obj.next) return pull.empty() |
89 | return pull( |
90 | this.app.readBlobSlice(obj.packLink, {start: obj.offset, end: obj.next}), |
91 | this.decodeObject({ |
92 | type: obj.type, |
93 | length: obj.length, |
94 | packLink: obj.packLink, |
95 | idx: obj.idx, |
96 | }) |
97 | ) |
98 | } |
99 | |
100 | // find which packfile contains a git object, and where in the packfile it is |
101 | // located |
102 | Git.prototype._findObject = function (opts, cb) { |
103 | if (!opts.headMsgId) return cb(new TypeError('missing head message id')) |
104 | if (!opts.obj) return cb(new TypeError('missing object id')) |
105 | var self = this |
106 | var objId = opts.obj |
107 | if (objId === emptyBlobHash) { |
108 | // special case: the empty blob may be found anywhere |
109 | self.app.getMsgDecrypted(opts.headMsgId, function (err, msg) { |
110 | if (err) return cb(err) |
111 | return cb(null, { |
112 | offset: 0, |
113 | next: 0, |
114 | packLink: null, |
115 | idx: null, |
116 | msg: msg, |
117 | }) |
118 | }) |
119 | } |
120 | self.findObjectMsgs(opts, function (err, msgs) { |
121 | if (err) return cb(err) |
122 | if (msgs.length === 0) |
123 | return cb(new ObjectNotFoundError('unable to find git object ' + objId)) |
124 | self.findObjectInMsgs(objId, msgs, cb) |
125 | }) |
126 | } |
127 | |
128 | Git.prototype._findObjectInMsg = function (opts, cb) { |
129 | if (!opts.msg) return cb(new TypeError('missing message id')) |
130 | if (!opts.obj) return cb(new TypeError('missing object id')) |
131 | var self = this |
132 | self.app.getMsgDecrypted(opts.msg, function (err, msg) { |
133 | if (err) return cb(err) |
134 | self.findObjectInMsgs(opts.obj, [msg], cb) |
135 | }) |
136 | } |
137 | |
138 | Git.prototype.findObjectInMsgs = function (objId, msgs, cb) { |
139 | var self = this |
140 | var objIdBuf = new Buffer(objId, 'hex') |
141 | // if blobs may need to be fetched, try to ask the user about as many of them |
142 | // at one time as possible |
143 | var packidxs = [].concat.apply([], msgs.map(function (msg) { |
144 | var c = msg.value.content |
145 | var idxs = u.toArray(c.indexes).map(u.toLink) |
146 | return u.toArray(c.packs).map(u.toLink).map(function (pack, i) { |
147 | var idx = idxs[i] |
148 | if (pack && idx) return { |
149 | msg: msg, |
150 | packLink: pack, |
151 | idxLink: idx, |
152 | } |
153 | }) |
154 | })).filter(Boolean) |
155 | var blobLinks = packidxs.length === 1 |
156 | ? [packidxs[0].idxLink, packidxs[0].packLink] |
157 | : packidxs.map(function (packidx) { |
158 | return packidx.idxLink |
159 | }) |
160 | self.app.ensureHasBlobs(blobLinks, function (err) { |
161 | if (err) return cb(err) |
162 | pull( |
163 | pull.values(packidxs), |
164 | paramap(function (pack, cb) { |
165 | self.getPackIndex(pack.idxLink, function (err, idx) { |
166 | if (err) return cb(err) |
167 | var offset = idx.find(objIdBuf) |
168 | if (!offset) return cb() |
169 | cb(null, { |
170 | offset: offset.offset, |
171 | next: offset.next, |
172 | packLink: pack.packLink, |
173 | idx: idx, |
174 | msg: pack.msg, |
175 | }) |
176 | }) |
177 | }, 4), |
178 | pull.filter(), |
179 | pull.take(1), |
180 | pull.collect(function (err, offsets) { |
181 | if (err) return cb(err) |
182 | if (offsets.length === 0) |
183 | return cb(new ObjectNotFoundError('unable to find git object ' |
184 | + objId + ' in ' + msgs.length + ' messages')) |
185 | cb(null, offsets[0]) |
186 | }) |
187 | ) |
188 | }) |
189 | } |
190 | |
191 | // given an object id and ssb msg id, get a set of messages of which at least one pushed the object. |
192 | Git.prototype.findObjectMsgs = function (opts, cb) { |
193 | var self = this |
194 | var id = opts.obj |
195 | var headMsgId = opts.headMsgId |
196 | var ended = false |
197 | var waiting = 0 |
198 | var maybeMsgs = [] |
199 | |
200 | function cbOnce(err, msgs) { |
201 | if (ended) return |
202 | ended = true |
203 | cb(err, msgs) |
204 | } |
205 | |
206 | function objectMatches(commit) { |
207 | return commit && (commit === id || commit.sha1 === id) |
208 | } |
209 | |
210 | if (!headMsgId) return cb(new TypeError('missing head message id')) |
211 | if (!u.isRef(headMsgId)) |
212 | return cb(new TypeError('bad head message id \'' + headMsgId + '\'')) |
213 | |
214 | ;(function getMsg(id) { |
215 | waiting++ |
216 | self.app.getMsgDecrypted(id, function (err, msg) { |
217 | waiting-- |
218 | if (ended) return |
219 | if (err && err.name == 'NotFoundError') |
220 | return cbOnce(new Error('missing message ' + headMsgId)) |
221 | if (err) return cbOnce(err) |
222 | var c = msg.value.content |
223 | if (typeof c === 'string') |
224 | return cbOnce(new Error('unable to decrypt message ' + msg.key)) |
225 | if ((u.toArray(c.object_ids).some(objectMatches)) |
226 | || (u.toArray(c.tags).some(objectMatches)) |
227 | || (u.toArray(c.commits).some(objectMatches))) { |
228 | // found the object |
229 | return cbOnce(null, [msg]) |
230 | } else if (!c.object_ids) { |
231 | // the object might be here |
232 | maybeMsgs.push(msg) |
233 | } |
234 | // traverse the DAG to keep looking for the object |
235 | u.toArray(c.repoBranch).filter(u.isRef).forEach(getMsg) |
236 | if (waiting === 0) { |
237 | cbOnce(null, maybeMsgs) |
238 | } |
239 | }) |
240 | })(headMsgId) |
241 | } |
242 | |
243 | Git.prototype._getPackIndex = function (idxBlobLink, cb) { |
244 | pull(this.app.readBlob(idxBlobLink), packidx(cb)) |
245 | } |
246 | |
247 | var objectTypes = [ |
248 | 'none', 'commit', 'tree', 'blob', |
249 | 'tag', 'unused', 'ofs-delta', 'ref-delta' |
250 | ] |
251 | |
252 | function readTypedVarInt(reader, cb) { |
253 | var type, value, shift |
254 | reader.read(1, function (end, buf) { |
255 | if (ended = end) return cb(end) |
256 | var firstByte = buf[0] |
257 | type = objectTypes[(firstByte >> 4) & 7] |
258 | value = firstByte & 15 |
259 | shift = 4 |
260 | checkByte(firstByte) |
261 | }) |
262 | |
263 | function checkByte(byte) { |
264 | if (byte & 0x80) |
265 | reader.read(1, gotByte) |
266 | else |
267 | cb(null, type, value) |
268 | } |
269 | |
270 | function gotByte(end, buf) { |
271 | if (ended = end) return cb(end) |
272 | var byte = buf[0] |
273 | value += (byte & 0x7f) << shift |
274 | shift += 7 |
275 | checkByte(byte) |
276 | } |
277 | } |
278 | |
279 | function readVarInt(reader, cb) { |
280 | var value = 0, shift = 0 |
281 | reader.read(1, function gotByte(end, buf) { |
282 | if (ended = end) return cb(end) |
283 | var byte = buf[0] |
284 | value += (byte & 0x7f) << shift |
285 | shift += 7 |
286 | if (byte & 0x80) |
287 | reader.read(1, gotByte) |
288 | else |
289 | cb(null, value) |
290 | }) |
291 | } |
292 | |
293 | function inflate(read) { |
294 | return toPull(zlib.createInflate())(read) |
295 | } |
296 | |
297 | Git.prototype.decodeObject = function (opts) { |
298 | var self = this |
299 | var packLink = opts.packLink |
300 | return function (read) { |
301 | var reader = Reader() |
302 | reader(read) |
303 | return u.readNext(function (cb) { |
304 | readTypedVarInt(reader, function (end, type, length) { |
305 | if (end === true) cb(new Error('Missing object type')) |
306 | else if (end) cb(end) |
307 | else if (type === 'ref-delta') getObjectFromRefDelta(length, cb) |
308 | else if (opts.type && type !== opts.type) |
309 | cb(new Error('expected type \'' + opts.type + '\' ' + |
310 | 'but found \'' + type + '\'')) |
311 | else if (opts.length && length !== opts.length) |
312 | cb(new Error('expected length ' + opts.length + ' ' + |
313 | 'but found ' + length)) |
314 | else cb(null, inflate(reader.read())) |
315 | }) |
316 | }) |
317 | |
318 | function getObjectFromRefDelta(length, cb) { |
319 | reader.read(20, function (end, sourceHash) { |
320 | if (end) return cb(end) |
321 | var inflatedReader = Reader() |
322 | pull(reader.read(), inflate, inflatedReader) |
323 | readVarInt(inflatedReader, function (err, expectedSourceLength) { |
324 | if (err) return cb(err) |
325 | readVarInt(inflatedReader, function (err, expectedTargetLength) { |
326 | if (err) return cb(err) |
327 | var offset = opts.idx.find(sourceHash) |
328 | if (!offset) return cb(null, 'missing source object ' + |
329 | sourcehash.toString('hex')) |
330 | var readSource = pull( |
331 | self.app.readBlobSlice(opts.packLink, { |
332 | start: offset.offset, |
333 | end: offset.next |
334 | }), |
335 | self.decodeObject({ |
336 | type: opts.type, |
337 | length: expectedSourceLength, |
338 | packLink: opts.packLink, |
339 | idx: opts.idx |
340 | }) |
341 | ) |
342 | cb(null, patchObject(inflatedReader, length, readSource, expectedTargetLength)) |
343 | }) |
344 | }) |
345 | }) |
346 | } |
347 | } |
348 | } |
349 | |
350 | function readOffsetSize(cmd, reader, readCb) { |
351 | var offset = 0, size = 0 |
352 | |
353 | function addByte(bit, outPos, cb) { |
354 | if (cmd & (1 << bit)) |
355 | reader.read(1, function (err, buf) { |
356 | if (err) readCb(err) |
357 | else cb(buf[0] << (outPos << 3)) |
358 | }) |
359 | else |
360 | cb(0) |
361 | } |
362 | |
363 | addByte(0, 0, function (val) { |
364 | offset = val |
365 | addByte(1, 1, function (val) { |
366 | offset |= val |
367 | addByte(2, 2, function (val) { |
368 | offset |= val |
369 | addByte(3, 3, function (val) { |
370 | offset |= val |
371 | addSize() |
372 | }) |
373 | }) |
374 | }) |
375 | }) |
376 | function addSize() { |
377 | addByte(4, 0, function (val) { |
378 | size = val |
379 | addByte(5, 1, function (val) { |
380 | size |= val |
381 | addByte(6, 2, function (val) { |
382 | size |= val |
383 | readCb(null, offset, size || 0x10000) |
384 | }) |
385 | }) |
386 | }) |
387 | } |
388 | } |
389 | |
390 | function patchObject(deltaReader, deltaLength, readSource, targetLength) { |
391 | var srcBuf |
392 | var ended |
393 | |
394 | return u.readNext(function (cb) { |
395 | pull(readSource, u.pullConcat(function (err, buf) { |
396 | if (err) return cb(err) |
397 | srcBuf = buf |
398 | cb(null, read) |
399 | })) |
400 | }) |
401 | |
402 | function read(abort, cb) { |
403 | if (ended) return cb(ended) |
404 | deltaReader.read(1, function (end, dBuf) { |
405 | if (ended = end) return cb(end) |
406 | var cmd = dBuf[0] |
407 | if (cmd & 0x80) |
408 | // skip a variable amount and then pass through a variable amount |
409 | readOffsetSize(cmd, deltaReader, function (err, offset, size) { |
410 | if (err) return earlyEnd(err) |
411 | var buf = srcBuf.slice(offset, offset + size) |
412 | cb(end, buf) |
413 | }) |
414 | else if (cmd) |
415 | // insert `cmd` bytes from delta |
416 | deltaReader.read(cmd, cb) |
417 | else |
418 | cb(new Error("unexpected delta opcode 0")) |
419 | }) |
420 | |
421 | function earlyEnd(err) { |
422 | cb(err === true ? new Error('stream ended early') : err) |
423 | } |
424 | } |
425 | } |
426 | |
427 | var gitNameRegex = /^(.*) <(([^>@]*)(@[^>]*)?)> (.*) (.*)$/ |
428 | function parseName(line) { |
429 | var m = gitNameRegex.exec(line) |
430 | if (!m) return null |
431 | return { |
432 | name: m[1], |
433 | email: m[2], |
434 | localpart: m[3], |
435 | feed: u.isRef(m[4]) && m[4] || undefined, |
436 | date: new Date(m[5] * 1000), |
437 | tz: m[6], |
438 | } |
439 | } |
440 | |
441 | Git.prototype.getCommit = function (obj, cb) { |
442 | pull(this.readObject(obj), u.pullConcat(function (err, buf) { |
443 | if (err) return cb(err) |
444 | var commit = { |
445 | msg: obj.msg, |
446 | parents: [], |
447 | } |
448 | var authorLine, committerLine |
449 | var lines = buf.toString('utf8').split('\n') |
450 | for (var line; (line = lines.shift()); ) { |
451 | var parts = line.split(' ') |
452 | var prop = parts.shift() |
453 | var value = parts.join(' ') |
454 | switch (prop) { |
455 | case 'tree': |
456 | commit.tree = value |
457 | break |
458 | case 'parent': |
459 | commit.parents.push(value) |
460 | break |
461 | case 'author': |
462 | authorLine = value |
463 | break |
464 | case 'committer': |
465 | committerLine = value |
466 | break |
467 | case 'gpgsig': |
468 | var sigLines = [value] |
469 | while (lines[0] && lines[0][0] == ' ') |
470 | sigLines.push(lines.shift().slice(1)) |
471 | commit.gpgsig = sigLines.join('\n') |
472 | break |
473 | default: |
474 | return cb(new TypeError('unknown git object property ' + prop)) |
475 | } |
476 | } |
477 | commit.committer = parseName(committerLine) |
478 | if (authorLine !== committerLine) commit.author = parseName(authorLine) |
479 | commit.body = lines.join('\n') |
480 | cb(null, commit) |
481 | })) |
482 | } |
483 | |
484 | Git.prototype.getTag = function (obj, cb) { |
485 | pull(this.readObject(obj), u.pullConcat(function (err, buf) { |
486 | if (err) return cb(err) |
487 | var tag = { |
488 | msg: obj.msg, |
489 | } |
490 | var authorLine, tagterLine |
491 | var lines = buf.toString('utf8').split('\n') |
492 | for (var line; (line = lines.shift()); ) { |
493 | var parts = line.split(' ') |
494 | var prop = parts.shift() |
495 | var value = parts.join(' ') |
496 | switch (prop) { |
497 | case 'object': |
498 | tag.object = value |
499 | break |
500 | case 'type': |
501 | if (!types[value]) |
502 | return cb(new TypeError('unknown git object type ' + type)) |
503 | tag.type = value |
504 | break |
505 | case 'tag': |
506 | tag.tag = value |
507 | break |
508 | case 'tagger': |
509 | tag.tagger = parseName(value) |
510 | break |
511 | default: |
512 | return cb(new TypeError('unknown git object property ' + prop)) |
513 | } |
514 | } |
515 | tag.body = lines.join('\n') |
516 | cb(null, tag) |
517 | })) |
518 | } |
519 | |
520 | function readCString(reader, cb) { |
521 | var chars = [] |
522 | var loop = looper(function () { |
523 | reader.read(1, next) |
524 | }) |
525 | function next(err, ch) { |
526 | if (err) return cb(err) |
527 | if (ch[0] === 0) return cb(null, Buffer.concat(chars).toString('utf8')) |
528 | chars.push(ch) |
529 | loop() |
530 | } |
531 | loop() |
532 | } |
533 | |
534 | Git.prototype.readTree = function (obj) { |
535 | var self = this |
536 | var reader = Reader() |
537 | reader(this.readObject(obj)) |
538 | return function (abort, cb) { |
539 | if (abort) return reader.abort(abort, cb) |
540 | readCString(reader, function (err, str) { |
541 | if (err) return cb(err) |
542 | var parts = str.split(' ') |
543 | var mode = parseInt(parts[0], 8) |
544 | var name = parts.slice(1).join(' ') |
545 | reader.read(20, function (err, hash) { |
546 | if (err) return cb(err) |
547 | cb(null, { |
548 | name: name, |
549 | mode: mode, |
550 | hash: hash.toString('hex'), |
551 | type: mode === 0040000 ? 'tree' : |
552 | mode === 0160000 ? 'commit' : 'blob', |
553 | }) |
554 | }) |
555 | }) |
556 | } |
557 | } |
558 | |
559 | Git.prototype.readCommitChanges = function (commit) { |
560 | var self = this |
561 | return u.readNext(function (cb) { |
562 | var done = multicb({pluck: 1}) |
563 | commit.parents.forEach(function (rev) { |
564 | var cb = done() |
565 | self.getObjectMsg({ |
566 | obj: rev, |
567 | headMsgId: commit.msg.key, |
568 | type: 'commit', |
569 | }, function (err, msg) { |
570 | if (err) return cb(err) |
571 | self.openObject({ |
572 | obj: rev, |
573 | msg: msg.key, |
574 | }, function (err, obj) { |
575 | if (err) return cb(err) |
576 | self.getCommit(obj, cb) |
577 | }) |
578 | }) |
579 | }) |
580 | done()(null, commit) |
581 | done(function (err, commits) { |
582 | if (err) return cb(err) |
583 | var done = multicb({pluck: 1}) |
584 | commits.forEach(function (commit) { |
585 | var cb = done() |
586 | if (!commit.tree) return cb(null, pull.empty()) |
587 | self.getObjectMsg({ |
588 | obj: commit.tree, |
589 | headMsgId: commit.msg.key, |
590 | type: 'tree', |
591 | }, function (err, msg) { |
592 | if (err) return cb(err) |
593 | self.openObject({ |
594 | obj: commit.tree, |
595 | msg: commit.msg.key, |
596 | }, cb) |
597 | }) |
598 | }) |
599 | done(function (err, trees) { |
600 | if (err) return cb(err) |
601 | cb(null, self.diffTreesRecursive(trees)) |
602 | }) |
603 | }) |
604 | }) |
605 | } |
606 | |
607 | Git.prototype.diffTrees = function (objs) { |
608 | var self = this |
609 | return pull( |
610 | kvdiff(objs.map(function (obj) { |
611 | return self.readTree(obj) |
612 | }), 'name'), |
613 | pull.map(function (item) { |
614 | var diff = item.diff || {} |
615 | var head = item.values[item.values.length-1] |
616 | var created = true |
617 | for (var k = 0; k < item.values.length-1; k++) |
618 | if (item.values[k]) created = false |
619 | return { |
620 | name: item.key, |
621 | hash: item.values.map(function (val) { return val.hash }), |
622 | mode: diff.mode, |
623 | type: item.values.map(function (val) { return val.type }), |
624 | deleted: !head, |
625 | created: created |
626 | } |
627 | }) |
628 | ) |
629 | } |
630 | |
631 | Git.prototype.diffTreesRecursive = function (objs) { |
632 | var self = this |
633 | return pull( |
634 | self.diffTrees(objs), |
635 | paramap(function (item, cb) { |
636 | if (!item.type.some(function (t) { return t === 'tree' })) |
637 | return cb(null, [item]) |
638 | var done = multicb({pluck: 1}) |
639 | item.type.forEach(function (type, i) { |
640 | var cb = done() |
641 | if (type !== 'tree') return cb(null, pull.once(item)) |
642 | var hash = item.hash[i] |
643 | self.getObjectMsg({ |
644 | obj: hash, |
645 | headMsgId: objs[i].msg.key, |
646 | }, function (err, msg) { |
647 | if (err) return cb(err) |
648 | self.openObject({ |
649 | obj: hash, |
650 | msg: msg.key, |
651 | }, cb) |
652 | }) |
653 | }) |
654 | done(function (err, objs) { |
655 | if (err) return cb(err) |
656 | cb(null, pull( |
657 | self.diffTreesRecursive(objs), |
658 | pull.map(function (f) { |
659 | f.name = item.name + '/' + f.name |
660 | return f |
661 | }) |
662 | )) |
663 | }) |
664 | }, 4), |
665 | pull.flatten() |
666 | ) |
667 | } |
668 |
Built with git-ssb-web