git ssb

0+

cel / ggscrape



Commit 83739963420642ad8021b66ab2f68cedaa6a99e6

Get message downloads working

Charles Lehner committed on 9/12/2014, 6:16:32 PM
Parent: 1f4de2eeab8c60b37f065c88bd87aa3ba6c51e0d

Files changed

ggscrapechanged
ggscrapeView
@@ -7,9 +7,9 @@
77 # Copyright (c) 2014 Charles Lehner
88 # Released under the terms of the MIT License.
99
1010 VERSION=0.0.1
11-BASE_URL='https://groups.google.com/forum/?_escaped_fragment_='
11 +BASE_URL='https://groups.google.com/forum/'
1212
1313 EX_USAGE=64
1414 topic_range=100
1515
@@ -28,24 +28,28 @@
2828 fi
2929 }
3030
3131 req() {
32- debug_print query: "$1"
32 + debug_print req: "$1"
3333 curl -sN -b "$cookie_str" "$BASE_URL$@"
3434 }
3535
36 +req_fragment() {
37 + req "?_escaped_fragment_=$@"
38 +}
39 +
3640 test_permission() {
3741 debug_print testing permission
38- req "forum/${group_id}%5B1-1-false%5D" -I | grep -q '200 OK'
42 + req_fragment "forum/${group_id}%5B1-1-false%5D" -I | grep -q '200 OK'
3943 }
4044
4145 get_topics_single() {
4246 local start=$1
4347 local end=$2
4448
4549 debug_print "get topics $group_id [$start-$end]"
4650
47- req "forum/${group_id}%5B${start}-${end}-false%5D" | sed -n \
51 + req_fragment "forum/${group_id}%5B${start}-${end}-false%5D" | sed -n \
4852 "s/^<i>Showing [^<]* of 0 topics<\/i>$//p;
4953 s/<tr>/\0\n/; /lastPostDate/ {
5054 s/.*lastPostDate\">\([^<]*\).*$/\1/m; P; D;
5155 };
@@ -64,9 +68,9 @@
6468 local start=$(($1))
6569 local end=$(($2))
6670 local temp_end
6771
68- if [[ -z "$start" ]]; then
72 + if ((start==0)); then
6973 start=1
7074 fi
7175
7276 debug_print "get all topics $group_id [$start-$end]"
@@ -79,9 +83,9 @@
7983
8084 # get message ids in a topic
8185 get_messages() {
8286 local topic_id="$1"
83- req "topic/${group_id}/${topic_id}" | sed -n\
87 + req_fragment "topic/${group_id}/${topic_id}" | sed -n\
8488 's/.*<td class="subject"><a href="[^"]*'$topic_id'\/\([^"]*\)".*/\1/p'
8589 }
8690
8791 download_message() {
@@ -89,11 +93,19 @@
8993 local msg_id="$2"
9094 debug_print download topic $topic_id message $msg_id
9195 path="${dest_dir}/${group_id}${topic_id}${msg_id}.eml"
9296 if [[ -s "$path" ]]; then
93- echo "file for message ${msg_id} already exists. skipping."
97 + echo "message ${topic_id}${msg_id} already downloaded. skipping."
9498 else
95- req "${group_id}/${topic_id}/${msg_id}" > "$path"
99 + echo "message ${topic_id}${msg_id} downloading."
100 + temp=$(mktemp)
101 + if req "message/raw?msg=${group_id}/${topic_id}/${msg_id}" -o "$temp"
102 + then
103 + mv "$temp" "$path"
104 + else
105 + echo "message ${topic_id}${msg_id} failed to download." >&2
106 + rm "$temp"
107 + fi
96108 fi
97109 }
98110
99111 download_messages() {

Built with git-ssb-web