git ssb

0+

cel / ssb-wikimedia



Tree: 3e16d9e26cb2f5204c569b42349451a6900adeda

Files: 3e16d9e26cb2f5204c569b42349451a6900adeda / bin.js

14816 bytesRaw
1#!/usr/bin/env node
2
3var fs = require('fs')
4var path = require('path')
5var URL = require('url')
6var http = require('http')
7var https = require('https')
8var crypto = require('crypto')
9var readline = require('readline')
10var os = require('os')
11
12var ssbClient = require('ssb-client')
13var pull = require('pull-stream')
14
15var pkg = require('./package')
16
17var userAgentBase = pkg.name + '/' + pkg.version
18var userAgentContact
19var userAgentBot = false
20
21function estimateMessageSize(content) {
22 var draftMsg = {
23 key: '%0000000000000000000000000000000000000000000=.sha256',
24 value: {
25 previous: '%0000000000000000000000000000000000000000000=.sha256',
26 author: '@0000000000000000000000000000000000000000000=.ed25519',
27 sequence: 100000,
28 timestamp: 1000000000000.0001,
29 hash: 'sha256',
30 content: content,
31 signature: '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000==.sig.ed25519'
32 }
33 }
34 return JSON.stringify(draftMsg, null, 2).length
35}
36
37function mapCollect(fn) {
38 var aborted
39 return function (read) {
40 var queue = []
41 return function (abort, cb) {
42 if (aborted = abort) return read(abort, cb)
43 read(null, function next(end, data) {
44 if (end) return cb(end)
45 queue.push(data)
46 var result = fn(queue)
47 if (result) read(null, next)
48 })
49 }
50 }
51}
52
53function getJson(url, cb) {
54 var opts = URL.parse(url)
55 opts.headers = {
56 'User-Agent': userAgentBase
57 + (userAgentContact ? ' (' + userAgentContact + ')' : '')
58 + (userAgentBot ? ' bot' : '')
59 }
60 var h = opts.protocol === 'https:' ? https : http
61 h.get(opts, function (res) {
62 if (res.statusCode !== 200) return cb(new Error('HTTP ' + res.statusCode + ' ' + res.statusMessage))
63 var bufs = []
64 res.on('data', function (buf) {
65 bufs.push(buf)
66 })
67 res.on('end', function () {
68 res.removeListener('error', cb)
69 var buf = Buffer.concat(bufs)
70 bufs = null
71 var data
72 try {
73 data = JSON.parse(buf.toString('utf8'))
74 } catch(e) {
75 return cb(e)
76 }
77 cb(null, data)
78 })
79 res.on('error', cb)
80 })
81}
82
83function publishDrafts(sbot, drafts, cb) {
84 var draftIdIndex = {}
85 drafts.forEach(function (draft, i) {
86 draftIdIndex[draft.draftId] = i
87 })
88 var ids = []
89
90 function replaceDraftIds(obj) {
91 if (typeof obj === 'string') {
92 var i = draftIdIndex[obj]
93 if (typeof i === 'number') {
94 var id = ids[i]
95 if (!id) throw new ReferenceError('draft referernces unknown message')
96 return id
97 }
98 } else if (Array.isArray(obj)) {
99 return obj.map(replaceDraftIds)
100 } else if (obj !== null && typeof obj === 'object') {
101 var o = {}
102 for (var k in obj) o[k] = replaceDraftIds(obj[k])
103 return o
104 }
105 return obj
106 }
107
108 pull(
109 pull.values(drafts),
110 pull.asyncMap(function (draft, cb) {
111 var content = replaceDraftIds(draft.content)
112 sbot.publish(content, function (err, msg) {
113 if (err) return cb(err)
114 ids.push(msg.key)
115 cb(null, msg)
116 })
117 }),
118 pull.collect(cb)
119 )
120}
121
122var args = process.argv.slice(2)
123var yes = false
124var dry = false
125var help = false
126var urls = []
127args.forEach(function (arg) {
128 if (arg[0] === '-') switch (arg) {
129 case '-n': return dry = true
130 case '-y': return yes = true
131 case '-h': return help = true
132 default: throw 'Unknown argument: ' + arg
133 } else urls.push(arg)
134})
135
136if (help) {
137 process.stdout.write(fs.readFileSync(path.join(__dirname, 'usage.txt')))
138 process.exit(0)
139}
140
141ssbClient(function (err, sbot, config) {
142 if (err) throw err
143 var conf = config.wikimedia || {}
144 userAgentContact = conf.contact
145 userAgentBot = conf.bot
146
147 if (urls.length === 0) {
148 var pagesFile = path.join(config.path, 'wikimedia-pages.txt')
149 var pagesData = fs.readFileSync(pagesFile, 'utf8')
150 urls = pagesData.split('\n').filter(RegExp.prototype.test.bind(/[^#]/))
151 if (!urls.length) {
152 console.log('No pages to sync.')
153 return sbot.close()
154 }
155 }
156
157 var pagesInfo = urls.map(function (page) {
158 var m = /^(.*?)\/wiki\/(.*)$/.exec(page)
159 if (!m) throw 'Unable to parse page URL ' + page
160 return {
161 site: m[1] + '/',
162 api: m[1] + '/w/api.php',
163 title: m[2]
164 }
165 })
166 var pagesInfoByApi = {}
167 pagesInfo.forEach(function (pageInfo) {
168 var infos = pagesInfoByApi[pageInfo.api] || (pagesInfoByApi[pageInfo.api] = [])
169 infos.push(pageInfo)
170 })
171 console.log('Normalizing titles...')
172 var waiting = 0
173 for (var api in pagesInfoByApi) (function (api) {
174 var pagesInfoForApi = pagesInfoByApi[api]
175 var pagesInfoForApiByTitle = {}
176 var titles = pagesInfoForApi.map(function (info) {
177 pagesInfoForApiByTitle[info.title] = info
178 return info.title
179 })
180 var url = api + '?format=json&action=query' +
181 '&titles=' + encodeURIComponent('\x1f' + titles.join('\x1f')) +
182 '&' // trailing & needed for some reason
183 waiting++
184 getJson(url, function (err, data) {
185 if (err) throw err
186 if (data.warnings) console.trace('Warnings:', data.warnings)
187 if (data.query.normalized) data.query.normalized.forEach(function (norm) {
188 var info = pagesInfoForApiByTitle[norm.from]
189 if (!info) {
190 console.error(JSON.stringify({titles: titles, response: data}, 0, 2))
191 throw new Error('Unexpected title in server response')
192 }
193 // console.log('Normalized title', norm.from, norm.to)
194 info.title = norm.to
195 })
196 if (!--waiting) next()
197 })
198 }(api))
199
200 function next() {
201 console.log('Getting revisions...')
202 var userHashes = {}
203 pull(
204 pull.values(pagesInfo),
205 pull.asyncMap(function (pageInfo, cb) {
206 // Calculate blob id for page URL + title, for linking
207 pull(
208 pull.once(pageInfo.site + '\t' + pageInfo.title),
209 sbot.blobs.add(function (err, hash) {
210 pageInfo.hash = hash
211 cb(null, pageInfo)
212 })
213 )
214 }),
215 pull.asyncMap(function (pageInfo, cb) {
216 // Get previous messages for this page.
217 // Simple solution: find the revision with latest timestamp.
218 var maxRevTs = ''
219 var maxRevMsgId
220 pull(
221 sbot.links({
222 dest: pageInfo.hash,
223 rel: 'pageId',
224 values: true,
225 meta: false
226 }),
227 pull.filter(function (msg) {
228 var c = msg && msg.value && msg.value.content
229 return c
230 && c.type === 'wikimedia/revisions'
231 && c.site === pageInfo.site
232 && c.title === pageInfo.title
233 }),
234 pull.drain(function (msg) {
235 var c = msg && msg.value && msg.value.content
236 var revs = Array.isArray(c.revisions) && c.revisions
237 if (revs) revs.forEach(function (rev) {
238 if (rev && rev.timestamp > maxRevTs) {
239 maxRevTs = rev.timestamp
240 maxRevMsgId == msg.key
241 }
242 })
243 }, function (err) {
244 if (err) return cb(err)
245 pageInfo.latestMsgId = maxRevMsgId
246 pageInfo.latestRevTs = maxRevTs
247 cb(null, pageInfo)
248 })
249 )
250 }),
251 pull.map(function (pageInfo) {
252 // Get new revisions.
253 var rvcontinue, rvdone
254 var rvstart = pageInfo.latestRevTs
255 var prevId = pageInfo.latestMsgId
256 var aborted
257 var revisions = pull(
258 function (abort, cb) {
259 if (aborted = abort) return cb(abort)
260 if (rvdone) return cb(true)
261 console.log('Getting revisions for', pageInfo.title + '...',
262 rvstart || '', rvcontinue || '')
263 var url = api + '?format=json&action=query&prop=revisions&rvslots=*'
264 + '&titles=' + encodeURIComponent(pageInfo.title)
265 + '&rvprop=ids|timestamp|comment|user|sha1|size|slotsha1|slotsize|content|roles|flags|tags'
266 + '&rvdir=newer'
267 + (rvcontinue ? '&rvcontinue=' + rvcontinue : '')
268 + (rvstart ? '&rvstart=' + rvstart : '')
269 + '&rvlimit=50'
270 getJson(url, function (err, data) {
271 if (aborted) return err && console.trace(err)
272 if (err) return cb(err)
273
274 var warnings = data.warnings
275 if (warnings) {
276 if (warnings.main) {
277 if (warnings.main['*'] === 'Unrecognized parameter: rvslots.') {
278 delete warnings.main['*']
279 if (Object.keys(warnings.main).length === 0) {
280 delete warnings.main
281 }
282 }
283 }
284 if (warnings.revisions) {
285 if (warnings.revisions['*'] === 'Unrecognized values for parameter "rvprop": slotsha1, slotsize, roles.') {
286 delete warnings.revisions['*']
287 if (Object.keys(warnings.revisions).length === 0) {
288 delete warnings.revisions
289 }
290 }
291 }
292 if (Object.keys(warnings).length > 0) {
293 console.trace('Warnings:', warnings)
294 }
295 }
296
297 rvcontinue = data.continue && data.continue.rvcontinue
298 if (!rvcontinue) rvdone = true
299 var page
300 if (data.query) for (var pageid in data.query.pages) {
301 page = data.query.pages[pageid]
302 if (page.title === pageInfo.title) break
303 else page = null
304 }
305 if (!page) {
306 console.trace(data.query.pages, pageInfo)
307 return cb(new Error('Unable to find page'))
308 }
309 var revs = page.revisions
310 if (!revs) {
311 console.trace(page, pageInfo)
312 return cb(new Error('Unable to get revisions'))
313 }
314 console.log('Got ' + page.revisions.length + ' revisions')
315 cb(null, page.revisions)
316 })
317 },
318 pull.flatten(),
319
320 pull.through(function (rev) {
321 if (!rev.slots) {
322 // old API does not use slots.
323 // Transform result to be forward-compatible.
324 rev.slots = {
325 main: {
326 size: rev.size,
327 sha1: rev.sha1,
328 contentmodel: rev.contentmodel,
329 contentformat: rev.contentformat,
330 '*': rev['*']
331 }
332 }
333 delete rev.contentmodel
334 delete rev.contentformat
335 delete rev['*']
336 }
337 // duplicate values supplied in new API in slotsize and slotsha1
338 delete rev.sha1
339 delete rev.size
340 }),
341
342 pull.asyncMap(function (rev, cb) {
343 // Calculate blob id for user page URL + title, for linking
344 var hash = userHashes[rev.user]
345 if (hash) {
346 rev.userId = hash
347 return cb(null, rev)
348 }
349 pull(
350 pull.once(pageInfo.site + '\tUser:' + rev.user),
351 sbot.blobs.add(function (err, hash) {
352 rev.userId = userHashes[rev.user] = hash
353 cb(null, rev)
354 })
355 )
356 }),
357 pull.asyncMap(function (rev, cb) {
358 var waiting = 0
359 for (var slot in rev.slots) (function (slot) {
360 waiting++
361 var slotInfo = rev.slots[slot]
362 var content = slotInfo['*']
363 if (!content) {
364 console.trace(slotInfo)
365 return cb(new Error('Missing content'))
366 }
367 var sha1 = crypto.createHash('sha1').update(content).digest('hex')
368 if (sha1 !== slotInfo.sha1) {
369 console.trace(slotInfo, sha1)
370 return cb(new Error('Mismatched content sha1'))
371 }
372 pull(
373 pull.once(content),
374 sbot.blobs.add(function (err, hash) {
375 if (err) return cb(err)
376 slotInfo.link = hash
377 delete slotInfo['*']
378 if (!--waiting) cb(null, rev)
379 })
380 )
381 }(slot))
382 })
383 )
384
385 var queuedRevisions = []
386 var ended
387 function cbDraft(content, cb) {
388 if (!content.revisions.length) {
389 console.log('No revisions for', pageInfo.title)
390 return cb(true)
391 }
392 console.log('Prepared a message',
393 'with', content.revisions.length, 'revisions',
394 'for', pageInfo.title)
395 prevId = '%' + crypto.createHash('sha256').update(JSON.stringify(content)).digest('base64') + '.draft6'
396 cb(null, {
397 draftId: prevId,
398 content: content
399 })
400 }
401 return function (abort, cb) {
402 if (abort) return revisions(abort, cb)
403 if (ended) return cb(true)
404 var content = {
405 type: 'wikimedia/revisions',
406 site: pageInfo.site,
407 title: pageInfo.title,
408 pageId: pageInfo.hash,
409 parents: prevId ? [prevId] : undefined,
410 revisions: queuedRevisions.splice(0)
411 }
412 revisions(null, function next(end, revision) {
413 if (ended = end) return cbDraft(content, cb)
414 content.revisions.push(revision)
415 if (estimateMessageSize(content) > 8192) {
416 queuedRevisions.push(content.revisions.pop())
417 // console.log('filled msg for ', pageInfo.title, ' with ', content.revisions.length, 'revisions')
418 return cbDraft(content, cb)
419 }
420 revisions(null, next)
421 })
422 }
423 }),
424 pull.flatten(),
425 pull.collect(function (err, drafts) {
426 if (err) throw err
427 if (dry) {
428 console.log(JSON.stringify(drafts, 0, 2))
429 return sbot.close()
430 }
431 if (yes) return confirmed(true)
432 var rl = readline.createInterface({
433 input: process.stdin,
434 output: process.stdout
435 })
436 rl.question('Publish ' + drafts.length + ' messages? [Y/n] ', function (answer) {
437 rl.close()
438 confirmed(!/^n/i.test(answer))
439 })
440 function confirmed(yes) {
441 if (!yes) return sbot.close()
442 publishDrafts(sbot, drafts, function (err, msgs) {
443 if (err) throw err
444 console.log('Published:\n' + msgs.map(function (msg) {
445 return msg.key
446 }.join('\n')))
447 sbot.close()
448 })
449 }
450 })
451 )
452 }
453})
454

Built with git-ssb-web