Files: f026a5e330ff1154aa9bc4c080cf82ff95b4d63d / bin.js
16126 bytesRaw
1 | |
2 | |
3 | var fs = require('fs') |
4 | var path = require('path') |
5 | var URL = require('url') |
6 | var http = require('http') |
7 | var https = require('https') |
8 | var crypto = require('crypto') |
9 | var readline = require('readline') |
10 | var os = require('os') |
11 | |
12 | var ssbClient = require('ssb-client') |
13 | var pull = require('pull-stream') |
14 | |
15 | var pkg = require('./package') |
16 | |
17 | var userAgentBase = pkg.name + '/' + pkg.version |
18 | var userAgentContact |
19 | var userAgentBot = false |
20 | |
21 | function estimateMessageSize(content) { |
22 | var draftMsg = { |
23 | key: '%0000000000000000000000000000000000000000000=.sha256', |
24 | value: { |
25 | previous: '%0000000000000000000000000000000000000000000=.sha256', |
26 | author: '@0000000000000000000000000000000000000000000=.ed25519', |
27 | sequence: 100000, |
28 | timestamp: 1000000000000.0001, |
29 | hash: 'sha256', |
30 | content: content, |
31 | signature: '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000==.sig.ed25519' |
32 | } |
33 | } |
34 | return JSON.stringify(draftMsg, null, 2).length |
35 | } |
36 | |
37 | function get(url, cb) { |
38 | var opts = URL.parse(url) |
39 | opts.headers = { |
40 | 'User-Agent': userAgentBase |
41 | + (userAgentContact ? ' (' + userAgentContact + ')' : '') |
42 | + (userAgentBot ? ' bot' : '') |
43 | } |
44 | var h = opts.protocol === 'https:' ? https : http |
45 | h.get(opts, function (res) { |
46 | if (res.statusCode !== 200) { |
47 | console.error(res.headers, url) |
48 | return cb(new Error('HTTP ' + res.statusCode + ' ' + res.statusMessage)) |
49 | } |
50 | var bufs = [] |
51 | res.on('data', function (buf) { |
52 | bufs.push(buf) |
53 | }) |
54 | res.on('end', function () { |
55 | res.removeListener('error', cb) |
56 | var str |
57 | try { |
58 | str = Buffer.concat(bufs).toString('utf8') |
59 | } catch(e) { |
60 | return cb(e) |
61 | } |
62 | cb(null, str) |
63 | }) |
64 | res.on('error', cb) |
65 | }) |
66 | } |
67 | |
68 | function getJson(url, cb) { |
69 | get(url, function (err, str) { |
70 | var data |
71 | try { |
72 | data = JSON.parse(str) |
73 | } catch(e) { |
74 | return cb(e) |
75 | } |
76 | cb(null, data) |
77 | }) |
78 | } |
79 | |
80 | function publishDrafts(sbot, drafts, cb) { |
81 | var draftIdIndex = {} |
82 | drafts.forEach(function (draft, i) { |
83 | draftIdIndex[draft.draftId] = i |
84 | }) |
85 | var ids = [] |
86 | |
87 | function replaceDraftIds(obj) { |
88 | if (typeof obj === 'string') { |
89 | var i = draftIdIndex[obj] |
90 | if (typeof i === 'number') { |
91 | var id = ids[i] |
92 | if (!id) throw new ReferenceError('draft referernces unknown message') |
93 | return id |
94 | } |
95 | } else if (Array.isArray(obj)) { |
96 | return obj.map(replaceDraftIds) |
97 | } else if (obj !== null && typeof obj === 'object') { |
98 | var o = {} |
99 | for (var k in obj) o[k] = replaceDraftIds(obj[k]) |
100 | return o |
101 | } |
102 | return obj |
103 | } |
104 | |
105 | pull( |
106 | pull.values(drafts), |
107 | pull.asyncMap(function (draft, cb) { |
108 | var content = replaceDraftIds(draft.content) |
109 | sbot.publish(content, function (err, msg) { |
110 | if (err) return cb(err) |
111 | ids.push(msg.key) |
112 | cb(null, msg) |
113 | }) |
114 | }), |
115 | pull.collect(cb) |
116 | ) |
117 | } |
118 | |
119 | var args = process.argv.slice(2) |
120 | var yes = false |
121 | var dry = false |
122 | var help = false |
123 | var urls = [] |
124 | args.forEach(function (arg) { |
125 | if (arg[0] === '-') switch (arg) { |
126 | case '-n': return dry = true |
127 | case '-y': return yes = true |
128 | case '-h': return help = true |
129 | default: throw 'Unknown argument: ' + arg |
130 | } else urls.push(arg) |
131 | }) |
132 | |
133 | if (help) { |
134 | process.stdout.write(fs.readFileSync(path.join(__dirname, 'usage.txt'))) |
135 | process.exit(0) |
136 | } |
137 | |
138 | var apiHrefRe = /(?:>api\.php<|<link rel="EditURI").* href="([^"]+\/api\.php)(?:[?#][^"]*)?".*/ |
139 | |
140 | ssbClient(function (err, sbot, config) { |
141 | if (err) throw err |
142 | var conf = config.wikimedia || {} |
143 | userAgentContact = conf.contact |
144 | userAgentBot = conf.bot |
145 | |
146 | if (urls.length === 0) { |
147 | var pagesFile = path.join(config.path, 'wikimedia-pages.txt') |
148 | var pagesData = fs.readFileSync(pagesFile, 'utf8') |
149 | urls = pagesData.split('\n').filter(RegExp.prototype.test.bind(/^[^#]/)) |
150 | if (!urls.length) { |
151 | console.log('No pages to sync.') |
152 | return sbot.close() |
153 | } |
154 | } |
155 | |
156 | var siteWikiBases = {} |
157 | var pagesInfo = urls.map(function (page) { |
158 | // Note: this assumes the wiki is either at / or /wiki/ |
159 | var m = /^(https?:\/\/.*?\/)(wiki\/)?(.*)$/.exec(page) |
160 | if (!m) throw 'Unable to parse page URL ' + page |
161 | return { |
162 | url: page, |
163 | site: m[1], |
164 | wikiBase: m[1] + (m[2] || ''), |
165 | title: m[3] |
166 | } |
167 | }) |
168 | // Group page info by site (wiki base url) |
169 | var pagesInfoBySite = {} |
170 | pagesInfo.forEach(function (pageInfo) { |
171 | var infos = pagesInfoBySite[pageInfo.site] |
172 | || (pagesInfoBySite[pageInfo.site] = []) |
173 | infos.push(pageInfo) |
174 | siteWikiBases[pageInfo.site] = pageInfo.wikiBase |
175 | }) |
176 | |
177 | var apiBySite = {} |
178 | findApis() |
179 | |
180 | function findApis() { |
181 | console.log('Finding Wikimedia APIs...') |
182 | // Some possible API locations: |
183 | // /mediawiki/api.php |
184 | // /wiki/api.php |
185 | // /w/api.php |
186 | // /api.php |
187 | // TODO: do this with less HTTP requests, or cache the results across runs |
188 | var waiting = 0 |
189 | for (var site in pagesInfoBySite) (function (site) { |
190 | waiting++ |
191 | var base = siteWikiBases[site] |
192 | var url = base + 'Special:Version' |
193 | get(url, function (err, html) { |
194 | if (err) throw err |
195 | var m = apiHrefRe.exec(html) |
196 | if (!m) throw new Error('Unable to find api.php for ' + site) |
197 | var api = URL.resolve(url, m[1]) |
198 | apiBySite[site] = api |
199 | if (!--waiting) normalizeTitles() |
200 | }) |
201 | }(site)) |
202 | } |
203 | |
204 | function normalizeTitles() { |
205 | console.log('Normalizing titles...') |
206 | var waiting = 0 |
207 | for (var site in pagesInfoBySite) (function (site) { |
208 | var pagesInfoForSite = pagesInfoBySite[site] |
209 | var pagesInfoForSiteByTitle = {} |
210 | var titles = pagesInfoForSite.map(function (info) { |
211 | pagesInfoForSiteByTitle[info.title] = info |
212 | return info.title |
213 | }) |
214 | var api = apiBySite[site] |
215 | var url = api |
216 | + '?format=json&action=query' |
217 | + '&titles=' + encodeURIComponent('\x1f' + titles.join('\x1f')) |
218 | + '&' // trailing & needed for some reason |
219 | waiting++ |
220 | getJson(url, function (err, data) { |
221 | if (err) throw err |
222 | if (data.warnings) console.trace('Warnings:', data.warnings) |
223 | if (data.query.normalized) data.query.normalized.forEach(function (norm) { |
224 | var info = pagesInfoForSiteByTitle[norm.from] |
225 | if (!info) { |
226 | console.error(JSON.stringify({titles: titles, response: data}, 0, 2)) |
227 | throw new Error('Unexpected title in server response') |
228 | } |
229 | // console.log('Normalized title', norm.from, norm.to) |
230 | info.title = norm.to |
231 | }) |
232 | if (!--waiting) getRevisions() |
233 | }) |
234 | }(site)) |
235 | } |
236 | |
237 | function getRevisions() { |
238 | console.log('Getting revisions...') |
239 | var userHashes = {} |
240 | pull( |
241 | pull.values(pagesInfo), |
242 | pull.asyncMap(function (pageInfo, cb) { |
243 | // Calculate blob id for page URL + title, for linking |
244 | pull( |
245 | pull.once(pageInfo.site + '\t' + pageInfo.title), |
246 | sbot.blobs.add(function (err, hash) { |
247 | pageInfo.hash = hash |
248 | cb(null, pageInfo) |
249 | }) |
250 | ) |
251 | }), |
252 | pull.asyncMap(function (pageInfo, cb) { |
253 | // Get previous messages for this page. |
254 | // Simple solution: find the revision with latest timestamp. |
255 | var maxRevTs = '' |
256 | var maxRevMsgId |
257 | pull( |
258 | sbot.links({ |
259 | dest: pageInfo.hash, |
260 | rel: 'pageId', |
261 | values: true, |
262 | meta: false |
263 | }), |
264 | pull.filter(function (msg) { |
265 | var c = msg && msg.value && msg.value.content |
266 | return c |
267 | && c.type === 'wikimedia/revisions' |
268 | && c.site === pageInfo.site |
269 | && c.title === pageInfo.title |
270 | }), |
271 | pull.drain(function (msg) { |
272 | var c = msg && msg.value && msg.value.content |
273 | var revs = Array.isArray(c.revisions) && c.revisions |
274 | if (revs) revs.forEach(function (rev) { |
275 | if (rev && rev.timestamp > maxRevTs) { |
276 | maxRevTs = rev.timestamp |
277 | maxRevMsgId = msg.key |
278 | } |
279 | }) |
280 | }, function (err) { |
281 | if (err) return cb(err) |
282 | pageInfo.latestMsgId = maxRevMsgId |
283 | pageInfo.latestRevTs = maxRevTs |
284 | cb(null, pageInfo) |
285 | }) |
286 | ) |
287 | }), |
288 | pull.map(function (pageInfo) { |
289 | // Get new revisions. |
290 | var rvcontinue, rvdone |
291 | var rvstart = pageInfo.latestRevTs |
292 | var prevId = pageInfo.latestMsgId |
293 | var aborted |
294 | var first = true |
295 | var revisions = pull( |
296 | function (abort, cb) { |
297 | if (aborted = abort) return cb(abort) |
298 | if (rvdone) return cb(true) |
299 | |
300 | console.log('Getting revisions for', pageInfo.title + '...', |
301 | rvstart || '', rvcontinue || '') |
302 | var url = apiBySite[pageInfo.site] |
303 | + '?format=json&action=query&prop=revisions&rvslots=*' |
304 | + '&titles=' + encodeURIComponent(pageInfo.title) |
305 | + '&rvprop=ids|timestamp|comment|user|sha1|size|slotsha1|slotsize|content|roles|flags|tags' |
306 | + '&rvdir=newer' |
307 | + (rvcontinue ? '&rvcontinue=' + rvcontinue : '') |
308 | + (rvstart ? '&rvstart=' + rvstart : '') |
309 | + '&rvlimit=50' |
310 | getJson(url, function (err, data) { |
311 | if (aborted) return err && console.trace(err) |
312 | if (err) return cb(err) |
313 | |
314 | var warnings = data.warnings |
315 | if (warnings) { |
316 | if (warnings.main) { |
317 | if (warnings.main['*'] === 'Unrecognized parameter: rvslots.') { |
318 | delete warnings.main['*'] |
319 | if (Object.keys(warnings.main).length === 0) { |
320 | delete warnings.main |
321 | } |
322 | } |
323 | } |
324 | if (warnings.revisions) { |
325 | if (warnings.revisions['*'] === 'Unrecognized values for parameter "rvprop": slotsha1, slotsize, roles.') { |
326 | delete warnings.revisions['*'] |
327 | if (Object.keys(warnings.revisions).length === 0) { |
328 | delete warnings.revisions |
329 | } |
330 | } |
331 | } |
332 | if (Object.keys(warnings).length > 0) { |
333 | console.trace('Warnings:', warnings) |
334 | } |
335 | } |
336 | |
337 | rvcontinue = data.continue && data.continue.rvcontinue |
338 | if (!rvcontinue) rvdone = true |
339 | var page |
340 | if (data.query) for (var pageid in data.query.pages) { |
341 | page = data.query.pages[pageid] |
342 | if (page.title === pageInfo.title) break |
343 | else page = null |
344 | } |
345 | if (!page) { |
346 | console.trace(data.query.pages, pageInfo) |
347 | return cb(new Error('Unable to find page')) |
348 | } |
349 | var revs = page.revisions || [] |
350 | console.log('Got ' + revs.length + ' revisions') |
351 | cb(null, revs) |
352 | }) |
353 | }, |
354 | pull.flatten(), |
355 | |
356 | pull.filter(function (rev) { |
357 | if (rev.timestamp === rvstart && first) { |
358 | first = false |
359 | return false |
360 | } |
361 | return true |
362 | }), |
363 | |
364 | pull.through(function (rev) { |
365 | if (!rev.slots) { |
366 | // old API does not use slots. |
367 | // Transform result to be forward-compatible. |
368 | rev.slots = { |
369 | main: { |
370 | size: rev.size, |
371 | sha1: rev.sha1, |
372 | contentmodel: rev.contentmodel, |
373 | contentformat: rev.contentformat, |
374 | '*': rev['*'] |
375 | } |
376 | } |
377 | delete rev.contentmodel |
378 | delete rev.contentformat |
379 | delete rev['*'] |
380 | } |
381 | // duplicate values supplied in new API in slotsize and slotsha1 |
382 | delete rev.sha1 |
383 | delete rev.size |
384 | }), |
385 | |
386 | pull.asyncMap(function (rev, cb) { |
387 | // Calculate blob id for user page URL + title, for linking |
388 | var hash = userHashes[rev.user] |
389 | if (hash) { |
390 | rev.userId = hash |
391 | return cb(null, rev) |
392 | } |
393 | pull( |
394 | pull.once(pageInfo.site + '\tUser:' + rev.user), |
395 | sbot.blobs.add(function (err, hash) { |
396 | rev.userId = userHashes[rev.user] = hash |
397 | cb(null, rev) |
398 | }) |
399 | ) |
400 | }), |
401 | |
402 | pull.asyncMap(function (rev, cb) { |
403 | var waiting = 0 |
404 | for (var slot in rev.slots) (function (slot) { |
405 | waiting++ |
406 | var slotInfo = rev.slots[slot] |
407 | var content = slotInfo['*'] |
408 | if (!content) { |
409 | console.trace(slotInfo) |
410 | return cb(new Error('Missing content')) |
411 | } |
412 | var sha1 = crypto.createHash('sha1').update(content).digest('hex') |
413 | if (sha1 !== slotInfo.sha1) { |
414 | console.trace(slotInfo, sha1) |
415 | return cb(new Error('Mismatched content sha1')) |
416 | } |
417 | pull( |
418 | pull.once(content), |
419 | sbot.blobs.add(function (err, hash) { |
420 | if (err) return cb(err) |
421 | slotInfo.link = hash |
422 | delete slotInfo['*'] |
423 | if (!--waiting) cb(null, rev) |
424 | }) |
425 | ) |
426 | }(slot)) |
427 | }) |
428 | ) |
429 | |
430 | var queuedRevisions = [] |
431 | var ended |
432 | function cbDraft(content, cb) { |
433 | if (!content.revisions.length) { |
434 | console.log('No revisions for', pageInfo.title) |
435 | return cb(true) |
436 | } |
437 | console.log('Prepared a message', |
438 | 'with', content.revisions.length, 'revisions', |
439 | 'for', pageInfo.title) |
440 | prevId = '%' + crypto.createHash('sha256').update(JSON.stringify(content)).digest('base64') + '.draft6' |
441 | cb(null, { |
442 | draftId: prevId, |
443 | content: content |
444 | }) |
445 | } |
446 | |
447 | return function (abort, cb) { |
448 | if (abort) return revisions(abort, cb) |
449 | if (ended) return cb(true) |
450 | var content = { |
451 | type: 'wikimedia/revisions', |
452 | site: pageInfo.site, |
453 | title: pageInfo.title, |
454 | pageId: pageInfo.hash, |
455 | parents: prevId ? [prevId] : undefined, |
456 | revisions: queuedRevisions.splice(0) |
457 | } |
458 | revisions(null, function next(end, revision) { |
459 | if (ended = end) return cbDraft(content, cb) |
460 | content.revisions.push(revision) |
461 | if (estimateMessageSize(content) > 8192) { |
462 | queuedRevisions.push(content.revisions.pop()) |
463 | // console.log('filled msg for ', pageInfo.title, ' with ', content.revisions.length, 'revisions') |
464 | return cbDraft(content, cb) |
465 | } |
466 | revisions(null, next) |
467 | }) |
468 | } |
469 | }), |
470 | pull.flatten(), |
471 | |
472 | pull.collect(function (err, drafts) { |
473 | if (err) throw err |
474 | if (dry) { |
475 | console.log(JSON.stringify(drafts, 0, 2)) |
476 | return sbot.close() |
477 | } |
478 | if (!drafts.length) { |
479 | console.log('No messages to publish.') |
480 | return sbot.close() |
481 | } |
482 | if (yes) return confirmed(true) |
483 | var rl = readline.createInterface({ |
484 | input: process.stdin, |
485 | output: process.stdout |
486 | }) |
487 | rl.question('Publish ' + drafts.length + ' messages? [Y/n] ', function (answer) { |
488 | rl.close() |
489 | confirmed(!/^n/i.test(answer)) |
490 | }) |
491 | function confirmed(yes) { |
492 | if (!yes) return sbot.close() |
493 | publishDrafts(sbot, drafts, function (err, msgs) { |
494 | if (err) throw err |
495 | console.log('Published:\n' + msgs.map(function (msg) { |
496 | return msg.key |
497 | }).join('\n')) |
498 | sbot.close() |
499 | }) |
500 | } |
501 | }) |
502 | ) |
503 | } |
504 | }) |
505 |
Built with git-ssb-web