Commit 83739963420642ad8021b66ab2f68cedaa6a99e6
Get message downloads working
Charles Lehner committed on 9/12/2014, 6:16:32 PMParent: 1f4de2eeab8c60b37f065c88bd87aa3ba6c51e0d
Files changed
ggscrape | changed |
ggscrape | |||
---|---|---|---|
@@ -7,9 +7,9 @@ | |||
7 | 7 … | # Copyright (c) 2014 Charles Lehner | |
8 | 8 … | # Released under the terms of the MIT License. | |
9 | 9 … | ||
10 | 10 … | VERSION=0.0.1 | |
11 | -BASE_URL='https://groups.google.com/forum/?_escaped_fragment_=' | ||
11 … | +BASE_URL='https://groups.google.com/forum/' | ||
12 | 12 … | ||
13 | 13 … | EX_USAGE=64 | |
14 | 14 … | topic_range=100 | |
15 | 15 … | ||
@@ -28,24 +28,28 @@ | |||
28 | 28 … | fi | |
29 | 29 … | } | |
30 | 30 … | ||
31 | 31 … | req() { | |
32 | - debug_print query: "$1" | ||
32 … | + debug_print req: "$1" | ||
33 | 33 … | curl -sN -b "$cookie_str" "$BASE_URL$@" | |
34 | 34 … | } | |
35 | 35 … | ||
36 … | +req_fragment() { | ||
37 … | + req "?_escaped_fragment_=$@" | ||
38 … | +} | ||
39 … | + | ||
36 | 40 … | test_permission() { | |
37 | 41 … | debug_print testing permission | |
38 | - req "forum/${group_id}%5B1-1-false%5D" -I | grep -q '200 OK' | ||
42 … | + req_fragment "forum/${group_id}%5B1-1-false%5D" -I | grep -q '200 OK' | ||
39 | 43 … | } | |
40 | 44 … | ||
41 | 45 … | get_topics_single() { | |
42 | 46 … | local start=$1 | |
43 | 47 … | local end=$2 | |
44 | 48 … | ||
45 | 49 … | debug_print "get topics $group_id [$start-$end]" | |
46 | 50 … | ||
47 | - req "forum/${group_id}%5B${start}-${end}-false%5D" | sed -n \ | ||
51 … | + req_fragment "forum/${group_id}%5B${start}-${end}-false%5D" | sed -n \ | ||
48 | 52 … | "s/^<i>Showing [^<]* of 0 topics<\/i>$//p; | |
49 | 53 … | s/<tr>/\0\n/; /lastPostDate/ { | |
50 | 54 … | s/.*lastPostDate\">\([^<]*\).*$/\1/m; P; D; | |
51 | 55 … | }; | |
@@ -64,9 +68,9 @@ | |||
64 | 68 … | local start=$(($1)) | |
65 | 69 … | local end=$(($2)) | |
66 | 70 … | local temp_end | |
67 | 71 … | ||
68 | - if [[ -z "$start" ]]; then | ||
72 … | + if ((start==0)); then | ||
69 | 73 … | start=1 | |
70 | 74 … | fi | |
71 | 75 … | ||
72 | 76 … | debug_print "get all topics $group_id [$start-$end]" | |
@@ -79,9 +83,9 @@ | |||
79 | 83 … | ||
80 | 84 … | # get message ids in a topic | |
81 | 85 … | get_messages() { | |
82 | 86 … | local topic_id="$1" | |
83 | - req "topic/${group_id}/${topic_id}" | sed -n\ | ||
87 … | + req_fragment "topic/${group_id}/${topic_id}" | sed -n\ | ||
84 | 88 … | 's/.*<td class="subject"><a href="[^"]*'$topic_id'\/\([^"]*\)".*/\1/p' | |
85 | 89 … | } | |
86 | 90 … | ||
87 | 91 … | download_message() { | |
@@ -89,11 +93,19 @@ | |||
89 | 93 … | local msg_id="$2" | |
90 | 94 … | debug_print download topic $topic_id message $msg_id | |
91 | 95 … | path="${dest_dir}/${group_id}${topic_id}${msg_id}.eml" | |
92 | 96 … | if [[ -s "$path" ]]; then | |
93 | - echo "file for message ${msg_id} already exists. skipping." | ||
97 … | + echo "message ${topic_id}${msg_id} already downloaded. skipping." | ||
94 | 98 … | else | |
95 | - req "${group_id}/${topic_id}/${msg_id}" > "$path" | ||
99 … | + echo "message ${topic_id}${msg_id} downloading." | ||
100 … | + temp=$(mktemp) | ||
101 … | + if req "message/raw?msg=${group_id}/${topic_id}/${msg_id}" -o "$temp" | ||
102 … | + then | ||
103 … | + mv "$temp" "$path" | ||
104 … | + else | ||
105 … | + echo "message ${topic_id}${msg_id} failed to download." >&2 | ||
106 … | + rm "$temp" | ||
107 … | + fi | ||
96 | 108 … | fi | |
97 | 109 … | } | |
98 | 110 … | ||
99 | 111 … | download_messages() { |
Built with git-ssb-web