git ssb

0+

cel / pull-git-pack-concat



Commit 611c32d6a1f261d8ef44138fceb9263a784d8856

Use pack-indexes to deduplicate packs

Charles Lehner committed on 10/15/2016, 6:07:35 PM
Parent: b2606aaae852e50f39ef35d8630a33ac2a744e71

Files changed

index.jschanged
package.jsonchanged
index.jsView
@@ -4,8 +4,10 @@
44 var buffered = require('pull-buffered')
55 var multicb = require('multicb')
66 var crypto = require('crypto')
77 var skipFooter = require('pull-skip-footer')
8 +var packidx = require('pull-git-packidx-parser')
9 +var blockFilter = require('pull-block-filter')
810
911 function packHeader(numObjects) {
1012 var header = new Buffer(12)
1113 header.write('PACK')
@@ -24,92 +26,58 @@
2426 })
2527 })
2628 }
2729
28-function reduceAsync(arr, fn, init, cb) {
29- var i = 0
30- var acc = init
31- loop(function (next) {
32- if (i >= arr.length) return cb(null, acc)
33- fn(arr[i++], acc, function (err, data) {
34- if (err) return cb(err)
35- acc = data
36- next()
37- })
38- })
30 +function compareByOffset(a, b) {
31 + return a.offset - b.offset
3932 }
4033
41-function skipHeader(len) {
42- return function (read) {
43- return function (end, cb) {
44- if (end || len <= 0) read(end, cb)
45- else read(null, function next(end, data) {
46- if (end) return cb(end)
47- var _len = len
48- len -= data.length
49- if (len > 0) read(null, next)
50- else cb(null, data.slice(_len))
51- })
52- }
53- }
54-}
55-
56-function readHeader(read, len, cb) {
57- var headerBufs = []
58- var dataBuf
59- read(null, function next(end, data) {
60- if (end) return cb(end === true ? new Error('Missing header') : err)
61- if (data.length > len) {
62- // got more than enough for header
63- headerBufs.push(data.slice(0, len))
64- var header = Buffer.concat(headerBufs)
65- headerBufs = null
66- dataBuf = data.slice(len)
67- cb(null, header, readRest)
68- } else if (data.length === len) {
69- // got enough for header
70- headerBufs.push(data)
71- var header = Buffer.concat(headerBufs)
72- headerBufs = null
73- cb(null, header, read)
74- } else {
75- len -= data.length
76- headerBufs.push(data)
77- read(null, next)
78- }
34 +function dedupPacks(packs, cb) {
35 + var seen = {}
36 + var numObjects = 0
37 + forEachAsync(packs, function (pack, cb) {
38 + return pull(pack.readIdx, packidx(function (err, idx) {
39 + if (err) return cb(err)
40 + var blocks = []
41 + var lastBlock
42 + offset = 0
43 + var objs = idx.objects.sort(compareByOffset)
44 + for (var i = 0; i < objs.length; i++) {
45 + var obj = objs[i]
46 + var id = obj.oid.toString('hex')
47 + if (seen[id]) continue
48 + seen[id] = true
49 + numObjects++
50 + if (obj.offset > offset) {
51 + blocks.push(lastBlock = {skip: obj.offset - offset, length: 0})
52 + offset = obj.offset
53 + } else if (obj.offset < offset) {
54 + return cb(new Error('bad offset'))
55 + }
56 + var len = obj.next ? obj.next.offset - obj.offset : Infinity
57 + lastBlock.length += len
58 + offset += len
59 + }
60 + pack.read = pull(
61 + pack.read,
62 + skipFooter(20),
63 + blockFilter(pull.values(blocks))
64 + )
65 + cb()
66 + }))
67 + }, function (err) {
68 + cb(err, numObjects)
7969 })
80- function readRest(end, cb) {
81- var buf = dataBuf
82- if (end || buf == null) read(end, cb)
83- else dataBuf = null, cb(null, buf)
84- }
8570 }
8671
87-function getNumObjects(packs, cb) {
88- reduceAsync(packs, function (pack, num, cb) {
89- if (pack.numObjects != null) {
90- pack.read = pull(pack.read, skipHeader(12), skipFooter(20))
91- cb(null, num + pack.numObjects)
92- } else {
93- readHeader(pack.read, 12, function (err, header, readRest) {
94- if (err === true) return cb(new Error('Missing header'))
95- if (err) return cb(err)
96- pack.numObjects = header.readUInt32BE(8)
97- pack.read = skipFooter(20)(readRest)
98- cb(null, num + pack.numObjects)
99- })
100- }
101- }, 0, cb)
102-}
103-
10472 function closePacks(packs, cb) {
10573 forEachAsync(packs, function (pack, cb) {
10674 pack.read(true, cb)
10775 }, cb)
10876 }
10977
11078 module.exports = function concatPacks(packs) {
111- /* packs: [{read: source, numObjects: int}] */
79 + /* packs: [{read: source, readIdx: source}] */
11280 if (packs.length === 1) return packs[0].read
11381
11482 var checksum = crypto.createHash('sha1')
11583 var packI = 0
@@ -118,17 +86,17 @@
11886 return function next(end, cb) {
11987 switch (state) {
12088 case 'begin':
12189 if (end) return closePacks(cb)
122- return getNumObjects(packs, function (err, numObjects) {
90 + return dedupPacks(packs, function (err, numObjects) {
12391 if (err) return cb(err)
124- state = 'startpack'
12592 var header = packHeader(numObjects)
12693 checksum.update(header)
94 + state = 'payload'
12795 cb(null, header)
12896 })
12997
130- case 'startpack':
98 + case 'payload':
13199 if (end) return closePacks(cb)
132100 if (packI >= packs.length) {
133101 state = 'end'
134102 return cb(null, checksum.digest())
package.jsonView
@@ -11,10 +11,12 @@
1111 ],
1212 "author": "Charles Lehner (http://celehner.com/)",
1313 "license": "Fair",
1414 "dependencies": {
15 + "git-packidx-parser": "^1.0.0",
1516 "looper": "^3.0.0",
1617 "multicb": "^1.2.1",
18 + "pull-block-filter": "^1.0.0",
1719 "pull-buffered": "^0.3.0",
1820 "pull-cat": "^1.1.8",
1921 "pull-skip-footer": "^0.1.0",
2022 "pull-stream": "^3.1.0"

Built with git-ssb-web