git ssb

0+

cel / pull-git-pack-concat



Commit 4b7c03d1abcfea9ae3f915c717c97174399dcc1a

Use pack-indexes to deduplicate packs

Charles Lehner committed on 10/14/2016, 5:21:25 AM
Parent: b2606aaae852e50f39ef35d8630a33ac2a744e71

Files changed

index.jschanged
package.jsonchanged
index.jsView
@@ -4,8 +4,10 @@
44 var buffered = require('pull-buffered')
55 var multicb = require('multicb')
66 var crypto = require('crypto')
77 var skipFooter = require('pull-skip-footer')
8 +var packidx = require('pull-git-packidx-parser')
9 +var blockFilter = require('pull-block-filter')
810
911 function packHeader(numObjects) {
1012 var header = new Buffer(12)
1113 header.write('PACK')
@@ -24,21 +26,8 @@
2426 })
2527 })
2628 }
2729
28-function reduceAsync(arr, fn, init, cb) {
29- var i = 0
30- var acc = init
31- loop(function (next) {
32- if (i >= arr.length) return cb(null, acc)
33- fn(arr[i++], acc, function (err, data) {
34- if (err) return cb(err)
35- acc = data
36- next()
37- })
38- })
39-}
40-
4130 function skipHeader(len) {
4231 return function (read) {
4332 return function (end, cb) {
4433 if (end || len <= 0) read(end, cb)
@@ -52,54 +41,49 @@
5241 }
5342 }
5443 }
5544
56-function readHeader(read, len, cb) {
57- var headerBufs = []
58- var dataBuf
59- read(null, function next(end, data) {
60- if (end) return cb(end === true ? new Error('Missing header') : err)
61- if (data.length > len) {
62- // got more than enough for header
63- headerBufs.push(data.slice(0, len))
64- var header = Buffer.concat(headerBufs)
65- headerBufs = null
66- dataBuf = data.slice(len)
67- cb(null, header, readRest)
68- } else if (data.length === len) {
69- // got enough for header
70- headerBufs.push(data)
71- var header = Buffer.concat(headerBufs)
72- headerBufs = null
73- cb(null, header, read)
74- } else {
75- len -= data.length
76- headerBufs.push(data)
77- read(null, next)
78- }
79- })
80- function readRest(end, cb) {
81- var buf = dataBuf
82- if (end || buf == null) read(end, cb)
83- else dataBuf = null, cb(null, buf)
84- }
45 +function compareByOffset(a, b) {
46 + return a.offset - b.offset
8547 }
8648
87-function getNumObjects(packs, cb) {
88- reduceAsync(packs, function (pack, num, cb) {
89- if (pack.numObjects != null) {
90- pack.read = pull(pack.read, skipHeader(12), skipFooter(20))
91- cb(null, num + pack.numObjects)
92- } else {
93- readHeader(pack.read, 12, function (err, header, readRest) {
94- if (err === true) return cb(new Error('Missing header'))
95- if (err) return cb(err)
96- pack.numObjects = header.readUInt32BE(8)
97- pack.read = skipFooter(20)(readRest)
98- cb(null, num + pack.numObjects)
99- })
100- }
101- }, 0, cb)
49 +function dedupPacks(packs, cb) {
50 + var seen = {}
51 + var numObjects = 0
52 + forEachAsync(packs, function (pack, cb) {
53 + return pull(pack.readIdx, packidx(function (err, idx) {
54 + if (err) return cb(err)
55 + var blocks = []
56 + var lastBlock
57 + offset = 0
58 + var objs = idx.objects.sort(compareByOffset)
59 + for (var i = 0; i < objs.length; i++) {
60 + var obj = objs[i]
61 + var id = obj.oid.toString('hex')
62 + if (id === '00947e10295e018fc71cf9c264ea5f341260f9b2') throw 1
63 + if (seen[id]) continue
64 + seen[id] = true
65 + numObjects++
66 + if (obj.offset > offset) {
67 + blocks.push(lastBlock = {skip: obj.offset - offset, length: 0})
68 + offset = obj.offset
69 + } else if (obj.offset < offset) {
70 + return cb(new Error('bad offset'))
71 + }
72 + var len = obj.next ? obj.next.offset - obj.offset : Infinity
73 + lastBlock.length += len
74 + offset += len
75 + }
76 + pack.read = pull(
77 + pack.read,
78 + skipFooter(20),
79 + blockFilter(pull.values(blocks))
80 + )
81 + cb()
82 + }))
83 + }, function (err) {
84 + cb(err, numObjects)
85 + })
10286 }
10387
10488 function closePacks(packs, cb) {
10589 forEachAsync(packs, function (pack, cb) {
@@ -107,9 +91,9 @@
10791 }, cb)
10892 }
10993
11094 module.exports = function concatPacks(packs) {
111- /* packs: [{read: source, numObjects: int}] */
95 + /* packs: [{read: source, readIdx: source}] */
11296 if (packs.length === 1) return packs[0].read
11397
11498 var checksum = crypto.createHash('sha1')
11599 var packI = 0
@@ -118,17 +102,17 @@
118102 return function next(end, cb) {
119103 switch (state) {
120104 case 'begin':
121105 if (end) return closePacks(cb)
122- return getNumObjects(packs, function (err, numObjects) {
106 + return dedupPacks(packs, function (err, numObjects) {
123107 if (err) return cb(err)
124- state = 'startpack'
125108 var header = packHeader(numObjects)
126109 checksum.update(header)
110 + state = 'payload'
127111 cb(null, header)
128112 })
129113
130- case 'startpack':
114 + case 'payload':
131115 if (end) return closePacks(cb)
132116 if (packI >= packs.length) {
133117 state = 'end'
134118 return cb(null, checksum.digest())
package.jsonView
@@ -11,10 +11,12 @@
1111 ],
1212 "author": "Charles Lehner (http://celehner.com/)",
1313 "license": "Fair",
1414 "dependencies": {
15 + "git-packidx-parser": "^1.0.0",
1516 "looper": "^3.0.0",
1617 "multicb": "^1.2.1",
18 + "pull-block-filter": "^1.0.0",
1719 "pull-buffered": "^0.3.0",
1820 "pull-cat": "^1.1.8",
1921 "pull-skip-footer": "^0.1.0",
2022 "pull-stream": "^3.1.0"

Built with git-ssb-web