Files: 6a24c182e6c1cf33a112af72fc079275c0ef9153 / generator / parse_content.py
3462 bytesRaw
1 | import datetime |
2 | import frontmatter |
3 | import os |
4 | import re |
5 | import unicodedata |
6 | |
7 | |
8 | input_dir = "content" |
9 | |
10 | |
11 | def slugify(string): |
12 | return re.sub( |
13 | r"\W+", |
14 | "", |
15 | unicodedata.normalize("NFKD", string) |
16 | .encode("ascii", "ignore") |
17 | .decode("ascii") |
18 | .lower(), |
19 | ) |
20 | |
21 | |
22 | def sortable_title(string): |
23 | return re.sub(r"^the ", "", string, flags=re.IGNORECASE).lower() |
24 | |
25 | |
26 | def parse_entry_data(entry_data): |
27 | entry_data["date"] = datetime.datetime.fromisoformat(entry_data["date"] + "+00:00") |
28 | if "description" in entry_data: |
29 | entry_data["description"] = entry_data["description"].strip() |
30 | if "links" in entry_data: |
31 | for link in entry_data["links"]: |
32 | link["title"] = link["title"].strip() |
33 | if "description" in link: |
34 | if link["description"].strip() != "": |
35 | link["description"] = link["description"].strip() |
36 | else: |
37 | del link["description"] |
38 | clean_link = dict((k, link[k]) for k in link) |
39 | if "modified" in entry_data: |
40 | entry_data["modified"] = datetime.datetime.fromisoformat( |
41 | entry_data["modified"] + "+00:00" |
42 | ) |
43 | if "tags" in entry_data: |
44 | entry_data["tags"] = [tag.strip() for tag in entry_data["tags"].split(",")] |
45 | if "series" in entry_data: |
46 | entry_data["series"] = entry_data["series"].strip() |
47 | entry_data["title"] = entry_data["title"].strip() |
48 | if os.path.isdir(f"{input_dir}/{entry_data['id']}"): |
49 | entry_data["dir"] = f"{input_dir}/{entry_data['id']}" |
50 | |
51 | |
52 | def _entries(): |
53 | entries = {} |
54 | |
55 | for filename in os.listdir(input_dir): |
56 | if filename.endswith(".md") or filename.endswith(".gmi"): |
57 | entry_data = frontmatter.load(f"{input_dir}/{filename}") |
58 | entry_data["id"], _ = map(slugify, os.path.splitext(filename)) |
59 | if "date" in entry_data: |
60 | parse_entry_data(entry_data) |
61 | entries[entry_data["id"]] = entry_data |
62 | entries = { |
63 | id: entries[id] |
64 | for id in sorted(entries.keys(), key=lambda entry: (entries[entry]["date"])) |
65 | } |
66 | return entries |
67 | |
68 | |
69 | entries = _entries() |
70 | |
71 | |
72 | def _series(): |
73 | series = [] |
74 | for entry_data in reversed(entries.values()): |
75 | if "series" in entry_data and slugify(entry_data["series"]) not in map( |
76 | slugify, series |
77 | ): |
78 | series += [entry_data["series"].strip()] |
79 | return sorted(series, key=sortable_title) |
80 | |
81 | |
82 | series = _series() |
83 | |
84 | |
85 | def _tags(): |
86 | tags = [] |
87 | for entry_data in entries.values(): |
88 | if "tags" in entry_data: |
89 | tags += [ |
90 | tag.strip() |
91 | for tag in entry_data["tags"] |
92 | if slugify(tag) not in map(slugify, tags) |
93 | ] |
94 | return sorted(tags, key=sortable_title) |
95 | |
96 | |
97 | tags = _tags() |
98 | |
99 | years = sorted( |
100 | set([datetime.datetime(entry["date"].year, 1, 1) for entry in entries.values()]) |
101 | ) |
102 | |
103 | months = sorted( |
104 | set( |
105 | [ |
106 | datetime.datetime(entry["date"].year, entry["date"].month, 1) |
107 | for entry in entries.values() |
108 | ] |
109 | ) |
110 | ) |
111 | |
112 | days = sorted( |
113 | set( |
114 | [ |
115 | datetime.datetime( |
116 | entry["date"].year, entry["date"].month, entry["date"].day |
117 | ) |
118 | for entry in entries.values() |
119 | ] |
120 | ) |
121 | ) |
122 | |
123 | updated = sorted( |
124 | [entry["date"] for entry in entries.values()] |
125 | + [entry["modified"] for entry in entries.values() if "modified" in entry] |
126 | )[-1] |
127 |
Built with git-ssb-web