Files: 2bf1394722753ed8aff3228422571438b98fe96f / bin.js
13142 bytesRaw
1 | |
2 | |
3 | var fs = require('fs') |
4 | var path = require('path') |
5 | var URL = require('url') |
6 | var http = require('http') |
7 | var https = require('https') |
8 | var crypto = require('crypto') |
9 | var readline = require('readline') |
10 | var os = require('os') |
11 | |
12 | var ssbClient = require('ssb-client') |
13 | var pull = require('pull-stream') |
14 | |
15 | var pkg = require('./package') |
16 | |
17 | var userAgentBase = pkg.name + '/' + pkg.version |
18 | var userAgentContact |
19 | var userAgentBot = false |
20 | |
21 | function estimateMessageSize(content) { |
22 | var draftMsg = { |
23 | key: '%0000000000000000000000000000000000000000000=.sha256', |
24 | value: { |
25 | previous: '%0000000000000000000000000000000000000000000=.sha256', |
26 | author: '@0000000000000000000000000000000000000000000=.ed25519', |
27 | sequence: 100000, |
28 | timestamp: 1000000000000.0001, |
29 | hash: 'sha256', |
30 | content: content, |
31 | signature: '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000==.sig.ed25519' |
32 | } |
33 | } |
34 | return JSON.stringify(draftMsg, null, 2).length |
35 | } |
36 | |
37 | function mapCollect(fn) { |
38 | var aborted |
39 | return function (read) { |
40 | var queue = [] |
41 | return function (abort, cb) { |
42 | if (aborted = abort) return read(abort, cb) |
43 | read(null, function next(end, data) { |
44 | if (end) return cb(end) |
45 | queue.push(data) |
46 | var result = fn(queue) |
47 | if (result) read(null, next) |
48 | }) |
49 | } |
50 | } |
51 | } |
52 | |
53 | function getJson(url, cb) { |
54 | var opts = URL.parse(url) |
55 | opts.headers = { |
56 | 'User-Agent': userAgentBase |
57 | + (userAgentContact ? ' (' + userAgentContact + ')' : '') |
58 | + (userAgentBot ? ' bot' : '') |
59 | } |
60 | var h = opts.protocol === 'https:' ? https : http |
61 | h.get(opts, function (res) { |
62 | if (res.statusCode !== 200) return cb(new Error('HTTP ' + res.statusCode + ' ' + res.statusMessage)) |
63 | var bufs = [] |
64 | res.on('data', function (buf) { |
65 | bufs.push(buf) |
66 | }) |
67 | res.on('end', function () { |
68 | res.removeListener('error', cb) |
69 | var buf = Buffer.concat(bufs) |
70 | bufs = null |
71 | var data |
72 | try { |
73 | data = JSON.parse(buf.toString('utf8')) |
74 | } catch(e) { |
75 | return cb(e) |
76 | } |
77 | cb(null, data) |
78 | }) |
79 | res.on('error', cb) |
80 | }) |
81 | } |
82 | |
83 | function publishDrafts(sbot, drafts, cb) { |
84 | var draftIdIndex = {} |
85 | drafts.forEach(function (draft, i) { |
86 | draftIdIndex[draft.draftId] = i |
87 | }) |
88 | var ids = [] |
89 | |
90 | function replaceDraftIds(obj) { |
91 | if (typeof obj === 'string') { |
92 | var i = draftIdIndex[obj] |
93 | if (typeof i === 'number') { |
94 | var id = ids[i] |
95 | if (!id) throw new ReferenceError('draft referernces unknown message') |
96 | return id |
97 | } |
98 | } else if (Array.isArray(obj)) { |
99 | return obj.map(replaceDraftIds) |
100 | } else if (obj !== null && typeof obj === 'object') { |
101 | var o = {} |
102 | for (var k in obj) o[k] = replaceDraftIds(obj[k]) |
103 | return o |
104 | } |
105 | return obj |
106 | } |
107 | |
108 | pull( |
109 | pull.values(drafts), |
110 | pull.asyncMap(function (draft, cb) { |
111 | var content = replaceDraftIds(draft.content) |
112 | sbot.publish(content, function (err, msg) { |
113 | if (err) return cb(err) |
114 | ids.push(msg.key) |
115 | cb(null, msg) |
116 | }) |
117 | }), |
118 | pull.collect(cb) |
119 | ) |
120 | } |
121 | |
122 | var args = process.argv.slice(2) |
123 | var yes = false |
124 | var dry = false |
125 | var help = false |
126 | var urls = [] |
127 | args.forEach(function (arg) { |
128 | if (arg[0] === '-') switch (arg) { |
129 | case '-n': return dry = true |
130 | case '-y': return yes = true |
131 | case '-h': return help = true |
132 | default: throw 'Unknown argument: ' + arg |
133 | } else urls.push(arg) |
134 | }) |
135 | |
136 | if (help) { |
137 | process.stdout.write(fs.readFileSync(path.join(__dirname, 'usage.txt'))) |
138 | process.exit(0) |
139 | } |
140 | |
141 | ssbClient(function (err, sbot, config) { |
142 | if (err) throw err |
143 | var conf = config.wikimedia || {} |
144 | userAgentContact = conf.contact |
145 | userAgentBot = conf.bot |
146 | |
147 | if (urls.length === 0) { |
148 | var pagesFile = path.join(config.path, 'wikimedia-pages.txt') |
149 | var pagesData = fs.readFileSync(pagesFile, 'utf8') |
150 | urls = pagesData.split('\n').filter(RegExp.prototype.test.bind(/[^#]/)) |
151 | if (!urls.length) { |
152 | console.log('No pages to sync.') |
153 | return sbot.close() |
154 | } |
155 | } |
156 | |
157 | var pagesInfo = urls.map(function (page) { |
158 | var m = /^(.*?)\/wiki\/(.*)$/.exec(page) |
159 | if (!m) throw 'Unable to parse page URL ' + page |
160 | return { |
161 | site: m[1] + '/', |
162 | api: m[1] + '/w/api.php', |
163 | title: m[2] |
164 | } |
165 | }) |
166 | var pagesInfoByApi = {} |
167 | pagesInfo.forEach(function (pageInfo) { |
168 | var infos = pagesInfoByApi[pageInfo.api] || (pagesInfoByApi[pageInfo.api] = []) |
169 | infos.push(pageInfo) |
170 | }) |
171 | console.log('Normalizing titles...') |
172 | var waiting = 0 |
173 | for (var api in pagesInfoByApi) (function (api) { |
174 | var pagesInfoForApi = pagesInfoByApi[api] |
175 | var pagesInfoForApiByTitle = {} |
176 | var titles = pagesInfoForApi.map(function (info) { |
177 | pagesInfoForApiByTitle[info.title] = info |
178 | return info.title |
179 | }) |
180 | var url = api + '?format=json&action=query' + |
181 | '&titles=' + encodeURIComponent('\x1f' + titles.join('\x1f')) |
182 | waiting++ |
183 | getJson(url, function (err, data) { |
184 | if (err) throw err |
185 | if (data.warnings) console.trace('Warnings:', data.warnings) |
186 | if (data.query.normalized) data.query.normalized.forEach(function (norm) { |
187 | var info = pagesInfoForApiByTitle[norm.from] |
188 | if (!info) { |
189 | console.error(JSON.stringify({titles: titles, response: data}, 0, 2)) |
190 | throw new Error('Unexpected title in server response') |
191 | } |
192 | // console.log('Normalized title', norm.from, norm.to) |
193 | info.title = norm.to |
194 | }) |
195 | if (!--waiting) next() |
196 | }) |
197 | }(api)) |
198 | |
199 | function next() { |
200 | console.log('Getting revisions...') |
201 | var userHashes = {} |
202 | pull( |
203 | pull.values(pagesInfo), |
204 | pull.asyncMap(function (pageInfo, cb) { |
205 | // Calculate blob id for page URL + title, for linking |
206 | pull( |
207 | pull.once(pageInfo.site + '\t' + pageInfo.title), |
208 | sbot.blobs.add(function (err, hash) { |
209 | pageInfo.hash = hash |
210 | cb(null, pageInfo) |
211 | }) |
212 | ) |
213 | }), |
214 | pull.asyncMap(function (pageInfo, cb) { |
215 | // Get previous messages for this page. |
216 | // Simple solution: find the revision with latest timestamp. |
217 | var maxRevTs = '' |
218 | var maxRevMsgId |
219 | pull( |
220 | sbot.links({ |
221 | dest: pageInfo.hash, |
222 | rel: 'pageId', |
223 | values: true, |
224 | meta: false |
225 | }), |
226 | pull.filter(function (msg) { |
227 | var c = msg && msg.value && msg.value.content |
228 | return c |
229 | && c.type === 'wikimedia/revisions' |
230 | && c.site === pageInfo.site |
231 | && c.title === pageInfo.title |
232 | }), |
233 | pull.drain(function (msg) { |
234 | var c = msg && msg.value && msg.value.content |
235 | var revs = Array.isArray(c.revisions) && c.revisions |
236 | if (revs) revs.forEach(function (rev) { |
237 | if (rev && rev.timestamp > maxRevTs) { |
238 | maxRevTs = rev.timestamp |
239 | maxRevMsgId == msg.key |
240 | } |
241 | }) |
242 | }, function (err) { |
243 | if (err) return cb(err) |
244 | pageInfo.latestMsgId = maxRevMsgId |
245 | pageInfo.latestRevTs = maxRevTs |
246 | cb(null, pageInfo) |
247 | }) |
248 | ) |
249 | }), |
250 | pull.map(function (pageInfo) { |
251 | // Get new revisions. |
252 | var rvcontinue, rvdone |
253 | var rvstart = pageInfo.latestRevTs |
254 | var prevId = pageInfo.latestMsgId |
255 | var aborted |
256 | var revisions = pull( |
257 | function (abort, cb) { |
258 | if (aborted = abort) return cb(abort) |
259 | if (rvdone) return cb(true) |
260 | console.log('Getting revisions for', pageInfo.title + '...', |
261 | rvstart || '', rvcontinue || '') |
262 | var url = api + '?format=json&action=query&prop=revisions&rvslots=*' |
263 | + '&titles=' + encodeURIComponent(pageInfo.title) |
264 | + '&rvprop=ids|timestamp|comment|user|slotsha1|slotsize|content|roles|flags|tags' |
265 | + '&rvdir=newer' |
266 | + (rvcontinue ? '&rvcontinue=' + rvcontinue : '') |
267 | + (rvstart ? '&rvstart=' + rvstart : '') |
268 | + '&rvlimit=50' |
269 | getJson(url, function (err, data) { |
270 | if (aborted) return err && console.trace(err) |
271 | if (err) return cb(err) |
272 | if (data.warnings) console.trace('Warnings:', data.warnings) |
273 | rvcontinue = data.continue && data.continue.rvcontinue |
274 | if (!rvcontinue) rvdone = true |
275 | var page |
276 | if (data.query) for (var pageid in data.query.pages) { |
277 | page = data.query.pages[pageid] |
278 | if (page.title === pageInfo.title) break |
279 | else page = null |
280 | } |
281 | if (!page) { |
282 | console.trace(data.query.pages, pageInfo) |
283 | return cb(new Error('Unable to find page')) |
284 | } |
285 | var revs = page.revisions |
286 | if (!revs) { |
287 | console.trace(page, pageInfo) |
288 | return cb(new Error('Unable to get revisions')) |
289 | } |
290 | console.log('Got ' + page.revisions.length + ' revisions') |
291 | cb(null, page.revisions) |
292 | }) |
293 | }, |
294 | pull.flatten(), |
295 | pull.asyncMap(function (rev, cb) { |
296 | // Calculate blob id for user page URL + title, for linking |
297 | var hash = userHashes[rev.user] |
298 | if (hash) { |
299 | rev.userId = hash |
300 | return cb(null, rev) |
301 | } |
302 | pull( |
303 | pull.once(pageInfo.site + '\tUser:' + rev.user), |
304 | sbot.blobs.add(function (err, hash) { |
305 | rev.userId = userHashes[rev.user] = hash |
306 | cb(null, rev) |
307 | }) |
308 | ) |
309 | }), |
310 | pull.asyncMap(function (rev, cb) { |
311 | var waiting = 0 |
312 | for (var slot in rev.slots) (function (slot) { |
313 | waiting++ |
314 | var slotInfo = rev.slots[slot] |
315 | var content = slotInfo['*'] |
316 | if (!content) { |
317 | console.trace(slotInfo) |
318 | return cb(new Error('Missing content')) |
319 | } |
320 | var sha1 = crypto.createHash('sha1').update(content).digest('hex') |
321 | if (sha1 !== slotInfo.sha1) { |
322 | console.trace(slotInfo, sha1) |
323 | return cb(new Error('Mismatched content sha1')) |
324 | } |
325 | pull( |
326 | pull.once(content), |
327 | sbot.blobs.add(function (err, hash) { |
328 | if (err) return cb(err) |
329 | slotInfo.link = hash |
330 | delete slotInfo['*'] |
331 | if (!--waiting) cb(null, rev) |
332 | }) |
333 | ) |
334 | }(slot)) |
335 | }) |
336 | ) |
337 | |
338 | var queuedRevisions = [] |
339 | var ended |
340 | function cbDraft(content, cb) { |
341 | if (!content.revisions.length) { |
342 | console.log('No revisions for', pageInfo.title) |
343 | return cb(true) |
344 | } |
345 | console.log('Prepared a message', |
346 | 'with', content.revisions.length, 'revisions', |
347 | 'for', pageInfo.title) |
348 | prevId = '%' + crypto.createHash('sha256').update(JSON.stringify(content)).digest('base64') + '.draft6' |
349 | cb(null, { |
350 | draftId: prevId, |
351 | content: content |
352 | }) |
353 | } |
354 | return function (abort, cb) { |
355 | if (abort) return revisions(abort, cb) |
356 | if (ended) return cb(true) |
357 | var content = { |
358 | type: 'wikimedia/revisions', |
359 | site: pageInfo.site, |
360 | title: pageInfo.title, |
361 | pageId: pageInfo.hash, |
362 | parents: prevId ? [prevId] : undefined, |
363 | revisions: queuedRevisions.splice(0) |
364 | } |
365 | revisions(null, function next(end, revision) { |
366 | if (ended = end) return cbDraft(content, cb) |
367 | content.revisions.push(revision) |
368 | if (estimateMessageSize(content) > 8192) { |
369 | queuedRevisions.push(content.revisions.pop()) |
370 | // console.log('filled msg for ', pageInfo.title, ' with ', content.revisions.length, 'revisions') |
371 | return cbDraft(content, cb) |
372 | } |
373 | revisions(null, next) |
374 | }) |
375 | } |
376 | }), |
377 | pull.flatten(), |
378 | pull.collect(function (err, drafts) { |
379 | if (err) throw err |
380 | if (dry) { |
381 | console.log(JSON.stringify(drafts, 0, 2)) |
382 | return sbot.close() |
383 | } |
384 | if (yes) return confirmed(true) |
385 | var rl = readline.createInterface({ |
386 | input: process.stdin, |
387 | output: process.stdout |
388 | }) |
389 | rl.question('Publish ' + drafts.length + ' messages? [Y/n] ', function (answer) { |
390 | rl.close() |
391 | confirmed(!/^n/i.test(answer)) |
392 | }) |
393 | function confirmed(yes) { |
394 | if (!yes) return sbot.close() |
395 | publishDrafts(sbot, drafts, function (err, msgs) { |
396 | if (err) throw err |
397 | console.log('Published:\n' + msgs.map(function (msg) { |
398 | return msg.key |
399 | }.join('\n'))) |
400 | sbot.close() |
401 | }) |
402 | } |
403 | }) |
404 | ) |
405 | } |
406 | }) |
407 |
Built with git-ssb-web