git ssb

0+

cel / ssb-wikimedia



Tree: fc9566b3a2c93fc5f470b663a3be05ff99b640f6

Files: fc9566b3a2c93fc5f470b663a3be05ff99b640f6 / bin.js

14862 bytesRaw
1#!/usr/bin/env node
2
3var fs = require('fs')
4var path = require('path')
5var URL = require('url')
6var http = require('http')
7var https = require('https')
8var crypto = require('crypto')
9var readline = require('readline')
10var os = require('os')
11
12var ssbClient = require('ssb-client')
13var pull = require('pull-stream')
14
15var pkg = require('./package')
16
17var userAgentBase = pkg.name + '/' + pkg.version
18var userAgentContact
19var userAgentBot = false
20
21function estimateMessageSize(content) {
22 var draftMsg = {
23 key: '%0000000000000000000000000000000000000000000=.sha256',
24 value: {
25 previous: '%0000000000000000000000000000000000000000000=.sha256',
26 author: '@0000000000000000000000000000000000000000000=.ed25519',
27 sequence: 100000,
28 timestamp: 1000000000000.0001,
29 hash: 'sha256',
30 content: content,
31 signature: '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000==.sig.ed25519'
32 }
33 }
34 return JSON.stringify(draftMsg, null, 2).length
35}
36
37function mapCollect(fn) {
38 var aborted
39 return function (read) {
40 var queue = []
41 return function (abort, cb) {
42 if (aborted = abort) return read(abort, cb)
43 read(null, function next(end, data) {
44 if (end) return cb(end)
45 queue.push(data)
46 var result = fn(queue)
47 if (result) read(null, next)
48 })
49 }
50 }
51}
52
53function getJson(url, cb) {
54 var opts = URL.parse(url)
55 opts.headers = {
56 'User-Agent': userAgentBase
57 + (userAgentContact ? ' (' + userAgentContact + ')' : '')
58 + (userAgentBot ? ' bot' : '')
59 }
60 var h = opts.protocol === 'https:' ? https : http
61 h.get(opts, function (res) {
62 if (res.statusCode !== 200) {
63 console.error(res.headers, url)
64 return cb(new Error('HTTP ' + res.statusCode + ' ' + res.statusMessage))
65 }
66 var bufs = []
67 res.on('data', function (buf) {
68 bufs.push(buf)
69 })
70 res.on('end', function () {
71 res.removeListener('error', cb)
72 var buf = Buffer.concat(bufs)
73 bufs = null
74 var data
75 try {
76 data = JSON.parse(buf.toString('utf8'))
77 } catch(e) {
78 return cb(e)
79 }
80 cb(null, data)
81 })
82 res.on('error', cb)
83 })
84}
85
86function publishDrafts(sbot, drafts, cb) {
87 var draftIdIndex = {}
88 drafts.forEach(function (draft, i) {
89 draftIdIndex[draft.draftId] = i
90 })
91 var ids = []
92
93 function replaceDraftIds(obj) {
94 if (typeof obj === 'string') {
95 var i = draftIdIndex[obj]
96 if (typeof i === 'number') {
97 var id = ids[i]
98 if (!id) throw new ReferenceError('draft referernces unknown message')
99 return id
100 }
101 } else if (Array.isArray(obj)) {
102 return obj.map(replaceDraftIds)
103 } else if (obj !== null && typeof obj === 'object') {
104 var o = {}
105 for (var k in obj) o[k] = replaceDraftIds(obj[k])
106 return o
107 }
108 return obj
109 }
110
111 pull(
112 pull.values(drafts),
113 pull.asyncMap(function (draft, cb) {
114 var content = replaceDraftIds(draft.content)
115 sbot.publish(content, function (err, msg) {
116 if (err) return cb(err)
117 ids.push(msg.key)
118 cb(null, msg)
119 })
120 }),
121 pull.collect(cb)
122 )
123}
124
125var args = process.argv.slice(2)
126var yes = false
127var dry = false
128var help = false
129var urls = []
130args.forEach(function (arg) {
131 if (arg[0] === '-') switch (arg) {
132 case '-n': return dry = true
133 case '-y': return yes = true
134 case '-h': return help = true
135 default: throw 'Unknown argument: ' + arg
136 } else urls.push(arg)
137})
138
139if (help) {
140 process.stdout.write(fs.readFileSync(path.join(__dirname, 'usage.txt')))
141 process.exit(0)
142}
143
144ssbClient(function (err, sbot, config) {
145 if (err) throw err
146 var conf = config.wikimedia || {}
147 userAgentContact = conf.contact
148 userAgentBot = conf.bot
149
150 if (urls.length === 0) {
151 var pagesFile = path.join(config.path, 'wikimedia-pages.txt')
152 var pagesData = fs.readFileSync(pagesFile, 'utf8')
153 urls = pagesData.split('\n').filter(RegExp.prototype.test.bind(/[^#]/))
154 if (!urls.length) {
155 console.log('No pages to sync.')
156 return sbot.close()
157 }
158 }
159
160 var pagesInfo = urls.map(function (page) {
161 var m = /^(https?:\/\/.*?)(\/wiki)?\/(.*)$/.exec(page)
162 if (!m) throw 'Unable to parse page URL ' + page
163 return {
164 site: m[1] + '/',
165 api: m[1] + (m[2] ? '/w' : '/wiki') + '/api.php',
166 title: m[3]
167 }
168 })
169 var pagesInfoByApi = {}
170 pagesInfo.forEach(function (pageInfo) {
171 var infos = pagesInfoByApi[pageInfo.api] || (pagesInfoByApi[pageInfo.api] = [])
172 infos.push(pageInfo)
173 })
174 console.log('Normalizing titles...')
175 var waiting = 0
176 for (var api in pagesInfoByApi) (function (api) {
177 var pagesInfoForApi = pagesInfoByApi[api]
178 var pagesInfoForApiByTitle = {}
179 var titles = pagesInfoForApi.map(function (info) {
180 pagesInfoForApiByTitle[info.title] = info
181 return info.title
182 })
183 var url = api + '?format=json&action=query' +
184 '&titles=' + encodeURIComponent('\x1f' + titles.join('\x1f')) +
185 '&' // trailing & needed for some reason
186 waiting++
187 getJson(url, function (err, data) {
188 if (err) throw err
189 if (data.warnings) console.trace('Warnings:', data.warnings)
190 if (data.query.normalized) data.query.normalized.forEach(function (norm) {
191 var info = pagesInfoForApiByTitle[norm.from]
192 if (!info) {
193 console.error(JSON.stringify({titles: titles, response: data}, 0, 2))
194 throw new Error('Unexpected title in server response')
195 }
196 // console.log('Normalized title', norm.from, norm.to)
197 info.title = norm.to
198 })
199 if (!--waiting) next()
200 })
201 }(api))
202
203 function next() {
204 console.log('Getting revisions...')
205 var userHashes = {}
206 pull(
207 pull.values(pagesInfo),
208 pull.asyncMap(function (pageInfo, cb) {
209 // Calculate blob id for page URL + title, for linking
210 pull(
211 pull.once(pageInfo.site + '\t' + pageInfo.title),
212 sbot.blobs.add(function (err, hash) {
213 pageInfo.hash = hash
214 cb(null, pageInfo)
215 })
216 )
217 }),
218 pull.asyncMap(function (pageInfo, cb) {
219 // Get previous messages for this page.
220 // Simple solution: find the revision with latest timestamp.
221 var maxRevTs = ''
222 var maxRevMsgId
223 pull(
224 sbot.links({
225 dest: pageInfo.hash,
226 rel: 'pageId',
227 values: true,
228 meta: false
229 }),
230 pull.filter(function (msg) {
231 var c = msg && msg.value && msg.value.content
232 return c
233 && c.type === 'wikimedia/revisions'
234 && c.site === pageInfo.site
235 && c.title === pageInfo.title
236 }),
237 pull.drain(function (msg) {
238 var c = msg && msg.value && msg.value.content
239 var revs = Array.isArray(c.revisions) && c.revisions
240 if (revs) revs.forEach(function (rev) {
241 if (rev && rev.timestamp > maxRevTs) {
242 maxRevTs = rev.timestamp
243 maxRevMsgId == msg.key
244 }
245 })
246 }, function (err) {
247 if (err) return cb(err)
248 pageInfo.latestMsgId = maxRevMsgId
249 pageInfo.latestRevTs = maxRevTs
250 cb(null, pageInfo)
251 })
252 )
253 }),
254 pull.map(function (pageInfo) {
255 // Get new revisions.
256 var rvcontinue, rvdone
257 var rvstart = pageInfo.latestRevTs
258 var prevId = pageInfo.latestMsgId
259 var aborted
260 var revisions = pull(
261 function (abort, cb) {
262 if (aborted = abort) return cb(abort)
263 if (rvdone) return cb(true)
264
265 console.log('Getting revisions for', pageInfo.title + '...',
266 rvstart || '', rvcontinue || '')
267 var url = api + '?format=json&action=query&prop=revisions&rvslots=*'
268 + '&titles=' + encodeURIComponent(pageInfo.title)
269 + '&rvprop=ids|timestamp|comment|user|sha1|size|slotsha1|slotsize|content|roles|flags|tags'
270 + '&rvdir=newer'
271 + (rvcontinue ? '&rvcontinue=' + rvcontinue : '')
272 + (rvstart ? '&rvstart=' + rvstart : '')
273 + '&rvlimit=50'
274 getJson(url, function (err, data) {
275 if (aborted) return err && console.trace(err)
276 if (err) return cb(err)
277
278 var warnings = data.warnings
279 if (warnings) {
280 if (warnings.main) {
281 if (warnings.main['*'] === 'Unrecognized parameter: rvslots.') {
282 delete warnings.main['*']
283 if (Object.keys(warnings.main).length === 0) {
284 delete warnings.main
285 }
286 }
287 }
288 if (warnings.revisions) {
289 if (warnings.revisions['*'] === 'Unrecognized values for parameter "rvprop": slotsha1, slotsize, roles.') {
290 delete warnings.revisions['*']
291 if (Object.keys(warnings.revisions).length === 0) {
292 delete warnings.revisions
293 }
294 }
295 }
296 if (Object.keys(warnings).length > 0) {
297 console.trace('Warnings:', warnings)
298 }
299 }
300
301 rvcontinue = data.continue && data.continue.rvcontinue
302 if (!rvcontinue) rvdone = true
303 var page
304 if (data.query) for (var pageid in data.query.pages) {
305 page = data.query.pages[pageid]
306 if (page.title === pageInfo.title) break
307 else page = null
308 }
309 if (!page) {
310 console.trace(data.query.pages, pageInfo)
311 return cb(new Error('Unable to find page'))
312 }
313 var revs = page.revisions || []
314 console.log('Got ' + revs.length + ' revisions')
315 cb(null, revs)
316 })
317 },
318 pull.flatten(),
319
320 pull.through(function (rev) {
321 if (!rev.slots) {
322 // old API does not use slots.
323 // Transform result to be forward-compatible.
324 rev.slots = {
325 main: {
326 size: rev.size,
327 sha1: rev.sha1,
328 contentmodel: rev.contentmodel,
329 contentformat: rev.contentformat,
330 '*': rev['*']
331 }
332 }
333 delete rev.contentmodel
334 delete rev.contentformat
335 delete rev['*']
336 }
337 // duplicate values supplied in new API in slotsize and slotsha1
338 delete rev.sha1
339 delete rev.size
340 }),
341
342 pull.asyncMap(function (rev, cb) {
343 // Calculate blob id for user page URL + title, for linking
344 var hash = userHashes[rev.user]
345 if (hash) {
346 rev.userId = hash
347 return cb(null, rev)
348 }
349 pull(
350 pull.once(pageInfo.site + '\tUser:' + rev.user),
351 sbot.blobs.add(function (err, hash) {
352 rev.userId = userHashes[rev.user] = hash
353 cb(null, rev)
354 })
355 )
356 }),
357
358 pull.asyncMap(function (rev, cb) {
359 var waiting = 0
360 for (var slot in rev.slots) (function (slot) {
361 waiting++
362 var slotInfo = rev.slots[slot]
363 var content = slotInfo['*']
364 if (!content) {
365 console.trace(slotInfo)
366 return cb(new Error('Missing content'))
367 }
368 var sha1 = crypto.createHash('sha1').update(content).digest('hex')
369 if (sha1 !== slotInfo.sha1) {
370 console.trace(slotInfo, sha1)
371 return cb(new Error('Mismatched content sha1'))
372 }
373 pull(
374 pull.once(content),
375 sbot.blobs.add(function (err, hash) {
376 if (err) return cb(err)
377 slotInfo.link = hash
378 delete slotInfo['*']
379 if (!--waiting) cb(null, rev)
380 })
381 )
382 }(slot))
383 })
384 )
385
386 var queuedRevisions = []
387 var ended
388 function cbDraft(content, cb) {
389 if (!content.revisions.length) {
390 console.log('No revisions for', pageInfo.title)
391 return cb(true)
392 }
393 console.log('Prepared a message',
394 'with', content.revisions.length, 'revisions',
395 'for', pageInfo.title)
396 prevId = '%' + crypto.createHash('sha256').update(JSON.stringify(content)).digest('base64') + '.draft6'
397 cb(null, {
398 draftId: prevId,
399 content: content
400 })
401 }
402
403 return function (abort, cb) {
404 if (abort) return revisions(abort, cb)
405 if (ended) return cb(true)
406 var content = {
407 type: 'wikimedia/revisions',
408 site: pageInfo.site,
409 title: pageInfo.title,
410 pageId: pageInfo.hash,
411 parents: prevId ? [prevId] : undefined,
412 revisions: queuedRevisions.splice(0)
413 }
414 revisions(null, function next(end, revision) {
415 if (ended = end) return cbDraft(content, cb)
416 content.revisions.push(revision)
417 if (estimateMessageSize(content) > 8192) {
418 queuedRevisions.push(content.revisions.pop())
419 // console.log('filled msg for ', pageInfo.title, ' with ', content.revisions.length, 'revisions')
420 return cbDraft(content, cb)
421 }
422 revisions(null, next)
423 })
424 }
425 }),
426 pull.flatten(),
427
428 pull.collect(function (err, drafts) {
429 if (err) throw err
430 if (dry) {
431 console.log(JSON.stringify(drafts, 0, 2))
432 return sbot.close()
433 }
434 if (!drafts.length) {
435 console.log('No messages to publish.')
436 return sbot.close()
437 }
438 if (yes) return confirmed(true)
439 var rl = readline.createInterface({
440 input: process.stdin,
441 output: process.stdout
442 })
443 rl.question('Publish ' + drafts.length + ' messages? [Y/n] ', function (answer) {
444 rl.close()
445 confirmed(!/^n/i.test(answer))
446 })
447 function confirmed(yes) {
448 if (!yes) return sbot.close()
449 publishDrafts(sbot, drafts, function (err, msgs) {
450 if (err) throw err
451 console.log('Published:\n' + msgs.map(function (msg) {
452 return msg.key
453 }.join('\n')))
454 sbot.close()
455 })
456 }
457 })
458 )
459 }
460})
461

Built with git-ssb-web