Commit a6d1c96847d89a3d559508ed9c4443a8730a7145
v03 - who wrote these posts
mixmix committed on 10/17/2018, 11:37:10 AMParent: 3b4dcd1fd6d4f5e8c7a7dc56ba8dbcd131583dfa
Files changed
README.md | changed |
package-lock.json | changed |
package.json | changed |
v02.js | changed |
helpers/days-posts.js | added |
v03.js | added |
README.md | ||
---|---|---|
@@ -90,4 +90,63 @@ | ||
90 | 90 … | Notice in the `timestamp` field, that I've not given a time I've given a _time range_ : `$gte: 1539687600000` means _greater than or equal to the start of 2018-10-17 in Melbourne Australia_ (relevant because that's the start of the day _subjectivtly_ for me where I am as I write this). `$lt: 1539774000000` means _less than the start of the day following that_ |
91 | 91 … | |
92 | 92 … | The `query` property is an Array because with more advanced queries we can also get the server to map and reduce the data we've got thereby further reducing the amount of data sent over muxrpc to us. I've put a commented out example in the code you can play with. |
93 | 93 … | |
94 … | + | |
95 … | +## `v03` - who wrote these posts? | |
96 … | + | |
97 … | +We want to put actual names to the posts we're reading. To keep things simple we're going to get the last name that the author of each post gave to themselves. | |
98 … | + | |
99 … | +So our stream pipeline is gonna be like this : | |
100 … | +``` | |
101 … | + source : start streaming all the posts | |
102 … | + v | |
103 … | + through : get the asserted name | |
104 … | + v | |
105 … | + sink : collect the results and print them out | |
106 … | +``` | |
107 … | + | |
108 … | +I've pulled the source out from v02 and put that in `herlpers/days-posts.js` so go read that to make sure you understand what sort of data is coming out of that source. | |
109 … | + | |
110 … | +The `through` we're using needs to be asynchronous, because to get names, we're hitting the database again. We're gonna use [**pull-paramap**](https://github.com/pull-stream/pull-paramap) because it allows us to run heaps of queries to the database in parallel which is hella fast. | |
111 … | + | |
112 … | +### The query for finding names | |
113 … | + | |
114 … | +Like most things in the scuttleverse how one names things is something which has emerged as a convention. Here's the most common shape of the content part of a naming message : | |
115 … | + | |
116 … | +```js | |
117 … | +{ | |
118 … | + type: 'about', | |
119 … | + about: Target, // the thing you're asserting something about | |
120 … | + name: String | |
121 … | +} | |
122 … | +``` | |
123 … | + | |
124 … | +And here's the full shape of our ssb-query in this case. Notice we are filtering for the about messages where the author of that message is asserting something about themselves (`value.author` and `value.content.about` are both the same). | |
125 … | +Also notice we're performing a `$map` which means the data coming back from this query is only the actual name, and not the whole about message. | |
126 … | + | |
127 … | +```js | |
128 … | +const opts = { | |
129 … | + limit: 1, | |
130 … | + reverse: true, | |
131 … | + query: [ | |
132 … | + { | |
133 … | + $filter: { | |
134 … | + value: { | |
135 … | + author: feedId, | |
136 … | + content: { | |
137 … | + type: 'about', | |
138 … | + about: feedId, | |
139 … | + name: { $is: 'string' } // there's a name string present | |
140 … | + } | |
141 … | + }, | |
142 … | + timestamp: { $gt: 0 } // a hack that forces ordering by timestamp | |
143 … | + } | |
144 … | + }, | |
145 … | + { | |
146 … | + $map: { | |
147 … | + name: ['value', 'content', 'name'] | |
148 … | + } | |
149 … | + } | |
150 … | + ] | |
151 … | +} | |
152 … | +``` |
package-lock.json | ||
---|---|---|
@@ -1620,8 +1620,23 @@ | ||
1620 | 1620 … | "version": "1.1.0", |
1621 | 1621 … | "resolved": "https://registry.npmjs.org/pull-pair/-/pull-pair-1.1.0.tgz", |
1622 | 1622 … | "integrity": "sha1-fuQnJj/fTaglOXrAoF4atLdL120=" |
1623 | 1623 … | }, |
1624 … | + "pull-paramap": { | |
1625 … | + "version": "1.2.2", | |
1626 … | + "resolved": "https://registry.npmjs.org/pull-paramap/-/pull-paramap-1.2.2.tgz", | |
1627 … | + "integrity": "sha1-UaQZPOnI1yFdla2tReK824STsjo=", | |
1628 … | + "requires": { | |
1629 … | + "looper": "^4.0.0" | |
1630 … | + }, | |
1631 … | + "dependencies": { | |
1632 … | + "looper": { | |
1633 … | + "version": "4.0.0", | |
1634 … | + "resolved": "https://registry.npmjs.org/looper/-/looper-4.0.0.tgz", | |
1635 … | + "integrity": "sha1-dwat7VmpntygbmtUu4bI7BnJUVU=" | |
1636 … | + } | |
1637 … | + } | |
1638 … | + }, | |
1624 | 1639 … | "pull-pushable": { |
1625 | 1640 … | "version": "2.2.0", |
1626 | 1641 … | "resolved": "https://registry.npmjs.org/pull-pushable/-/pull-pushable-2.2.0.tgz", |
1627 | 1642 … | "integrity": "sha1-Xy867UethpGfAbEqLpnW8b13ZYE=" |
package.json | ||
---|---|---|
@@ -16,8 +16,9 @@ | ||
16 | 16 … | "url": "https://github.com/mixmix/ssb-client-basic/issues" |
17 | 17 … | }, |
18 | 18 … | "homepage": "https://github.com/mixmix/ssb-client-basic#readme", |
19 | 19 … | "dependencies": { |
20 … | + "pull-paramap": "^1.2.2", | |
20 | 21 … | "ssb-client": "^4.6.0" |
21 | 22 … | }, |
22 | 23 … | "devDependencies": { |
23 | 24 … | "standard": "^12.0.1" |
v02.js | ||
---|---|---|
@@ -6,31 +6,31 @@ | ||
6 | 6 … | Connection((err, server) => { |
7 | 7 … | if (err) throw err |
8 | 8 … | console.log('Connection established') |
9 | 9 … | |
10 | - const rightNow = new Date() | |
10 … | + const today = new Date() | |
11 | 11 … | const opts = { |
12 | 12 … | reverse: true, |
13 | 13 … | query: [ |
14 | 14 … | { |
15 | 15 … | $filter: { |
16 | 16 … | value: { |
17 | 17 … | content: { type: 'post' }, |
18 | 18 … | timestamp: { |
19 | - $gte: Number(startOfDay(rightNow)), | |
20 | - $lt: Number(startOfDay(rightNow, +1)) | |
19 … | + $gte: Number(startOfDay(today)), | |
20 … | + $lt: Number(startOfDay(today, +1)) | |
21 | 21 … | } |
22 | 22 … | } |
23 | 23 … | } |
24 | 24 … | }, |
25 | - // { | |
26 | - // $map: { | |
27 | - // author: ['value', 'author'], | |
28 | - // timestamp: ['value', 'timestamp'], | |
29 | - // text: ['value', 'content', 'text'], | |
30 | - // root: ['value', 'content', 'root'] // the root messages of a thread, this is present if this post is a reply to another message | |
31 | - // } | |
32 | - // } | |
25 … | + { | |
26 … | + $map: { | |
27 … | + author: ['value', 'author'], | |
28 … | + timestamp: ['value', 'timestamp'], | |
29 … | + text: ['value', 'content', 'text'], | |
30 … | + root: ['value', 'content', 'root'] // the root messages of a thread, this is present if this post is a reply to another message | |
31 … | + } | |
32 … | + } | |
33 | 33 … | ] |
34 | 34 … | } |
35 | 35 … | |
36 | 36 … | pull( |
helpers/days-posts.js | ||
---|---|---|
@@ -1,0 +1,45 @@ | ||
1 … | +// Note this is just extracted from v02.js and gussied up a little to make is more useable and to make v03.js less cluttered | |
2 … | + | |
3 … | +module.exports = function (server) { | |
4 … | + if (!server) throw new Error('day-posts helper requires a server!') | |
5 … | + if (!server.query) throw new Error('day-posts helper requires a server with the ssb-query installed!') | |
6 … | + | |
7 … | + return function daysPosts (day = new Date()) { | |
8 … | + const opts = { | |
9 … | + reverse: true, | |
10 … | + query: [ | |
11 … | + { | |
12 … | + $filter: { | |
13 … | + value: { | |
14 … | + content: { type: 'post' }, | |
15 … | + timestamp: { | |
16 … | + $gte: Number(startOfDay(day)), | |
17 … | + $lt: Number(startOfDay(day, +1)) | |
18 … | + } | |
19 … | + } | |
20 … | + } | |
21 … | + }, { | |
22 … | + $map: { | |
23 … | + author: ['value', 'author'], | |
24 … | + timestamp: ['value', 'timestamp'], | |
25 … | + text: ['value', 'content', 'text'], | |
26 … | + root: ['value', 'content', 'root'] // the root messages of a thread, this is present if this post is a reply to another message | |
27 … | + } | |
28 … | + } | |
29 … | + ] | |
30 … | + } | |
31 … | + | |
32 … | + return server.query.read(opts) | |
33 … | + // returns a source stream | |
34 … | + // will not start delivering data until it's connected with a sink | |
35 … | + } | |
36 … | +} | |
37 … | + | |
38 … | +function startOfDay (time = new Date(), dayOffset = 0) { | |
39 … | + // dayOffset = 0 means if this argument is not supplied to set it to default to 0 | |
40 … | + | |
41 … | + const year = time.getFullYear() | |
42 … | + const month = time.getMonth() | |
43 … | + const date = time.getDate() + dayOffset | |
44 … | + return new Date(year, month, date, 0, 0, 0) // 0 hours, 0 minutes, 0 secords | |
45 … | +} |
v03.js | ||
---|---|---|
@@ -1,0 +1,100 @@ | ||
1 … | +const Connection = require('ssb-client') | |
2 … | +const pull = require('pull-stream') | |
3 … | +pull.paraMap = require('pull-paramap') | |
4 … | +const daysPosts = require('./helpers/days-posts') | |
5 … | + | |
6 … | +console.log('Connecting') | |
7 … | + | |
8 … | +Connection((err, server) => { | |
9 … | + if (err) throw err | |
10 … | + console.log('Connection established') | |
11 … | + | |
12 … | + const today = new Date(2018, 9, 17) | |
13 … | + | |
14 … | + console.time('get posts') | |
15 … | + pull( | |
16 … | + daysPosts(server)(today), | |
17 … | + pull.paraMap(getAuthorName, 50), // run up to 50 asyncrhonous maps in parallel | |
18 … | + pull.collect(onDone) | |
19 … | + ) | |
20 … | + | |
21 … | + // Note you could use pull.asyncMap, but it only does 1 async map at a time... it's 240x slower on my machine! | |
22 … | + | |
23 … | + function getAuthorName (data, cb) { | |
24 … | + // NOTE the data is coming in from the dayPosts source and has been mapped into the form { author, timestamp, text, root } | |
25 … | + | |
26 … | + // cb is a function provided to us by pull-paramap which we use to pass results out once we're done and to pass things on to the next part of the stream (the collect here) | |
27 … | + | |
28 … | + const feedId = data.author | |
29 … | + | |
30 … | + const opts = { | |
31 … | + limit: 1, | |
32 … | + reverse: true, | |
33 … | + query: [ | |
34 … | + { | |
35 … | + $filter: { | |
36 … | + value: { | |
37 … | + author: feedId, | |
38 … | + content: { | |
39 … | + type: 'about', | |
40 … | + about: feedId, | |
41 … | + name: { $is: 'string' } // there's a name string present | |
42 … | + } | |
43 … | + }, | |
44 … | + timestamp: { $gt: 0 } // a hack that forces ordering by timestamp | |
45 … | + } | |
46 … | + }, | |
47 … | + { | |
48 … | + $map: { | |
49 … | + name: ['value', 'content', 'name'] | |
50 … | + } | |
51 … | + } | |
52 … | + ] | |
53 … | + } | |
54 … | + | |
55 … | + pull( | |
56 … | + server.query.read(opts), | |
57 … | + pull.collect((err, results) => { | |
58 … | + if (err) { | |
59 … | + cb(err) | |
60 … | + return | |
61 … | + } | |
62 … | + | |
63 … | + var name | |
64 … | + if (!results || !results.length) name = feedId | |
65 … | + else name = results[0].name | |
66 … | + // console.log(name) // debug / see the names fly by as we get them! | |
67 … | + | |
68 … | + data.authorName = name | |
69 … | + // stample the name we found to the data object | |
70 … | + | |
71 … | + cb(null, data) | |
72 … | + }) | |
73 … | + ) | |
74 … | + } | |
75 … | + | |
76 … | + function onDone (err, msgs) { | |
77 … | + if (err) { | |
78 … | + console.error('oh noes', err) | |
79 … | + server.close() | |
80 … | + return | |
81 … | + } | |
82 … | + | |
83 … | + msgs.forEach(msg => { | |
84 … | + prettyPrint(msg) | |
85 … | + console.log('------') | |
86 … | + }) | |
87 … | + | |
88 … | + console.log(`${msgs.length} messages`) | |
89 … | + console.timeEnd('get posts') | |
90 … | + server.close() | |
91 … | + } | |
92 … | +}) | |
93 … | + | |
94 … | +// helpers | |
95 … | + | |
96 … | +function prettyPrint (obj) { | |
97 … | + console.log(JSON.stringify(obj, null, 2)) | |
98 … | + // this just print the full object out as a string that's been nicely indented | |
99 … | + // with each level of nesting | |
100 … | +} |
Built with git-ssb-web