Files: e9e0d6d17453d5cf4876648c94e0eb11f6e9427b / generator / parse_content.py
3315 bytesRaw
1 | import datetime |
2 | import frontmatter |
3 | import os |
4 | import re |
5 | import unicodedata |
6 | |
7 | |
8 | input_dir = "content" |
9 | |
10 | |
11 | def slugify(string): |
12 | return re.sub( |
13 | r"\W+", |
14 | "", |
15 | unicodedata.normalize("NFKD", string) |
16 | .encode("ascii", "ignore") |
17 | .decode("ascii") |
18 | .lower(), |
19 | ) |
20 | |
21 | |
22 | def parse_entry_data(entry_data): |
23 | entry_data["date"] = datetime.datetime.fromisoformat(entry_data["date"] + "+00:00") |
24 | if "description" in entry_data: |
25 | entry_data["description"] = entry_data["description"].strip() |
26 | if "links" in entry_data: |
27 | for link in entry_data["links"]: |
28 | link["title"] = link["title"].strip() |
29 | if "description" in link: |
30 | if link["description"].strip() != "": |
31 | link["description"] = link["description"].strip() |
32 | else: |
33 | del link["description"] |
34 | clean_link = dict((k, link[k]) for k in link) |
35 | if "modified" in entry_data: |
36 | entry_data["modified"] = datetime.datetime.fromisoformat( |
37 | entry_data["modified"] + "+00:00" |
38 | ) |
39 | if "tags" in entry_data: |
40 | entry_data["tags"] = [tag.strip() for tag in entry_data["tags"].split(",")] |
41 | if "series" in entry_data: |
42 | entry_data["series"] = entry_data["series"].strip() |
43 | entry_data["title"] = entry_data["title"].strip() |
44 | if os.path.isdir(f"{input_dir}/{entry_data['id']}"): |
45 | entry_data["dir"] = f"{input_dir}/{entry_data['id']}" |
46 | |
47 | |
48 | def _entries(): |
49 | entries = {} |
50 | |
51 | for filename in os.listdir(input_dir): |
52 | if filename.endswith(".md") or filename.endswith(".gmi"): |
53 | entry_data = frontmatter.load(f"{input_dir}/{filename}") |
54 | entry_data["id"], _ = map(slugify, os.path.splitext(filename)) |
55 | if "date" in entry_data: |
56 | parse_entry_data(entry_data) |
57 | entries[entry_data["id"]] = entry_data |
58 | entries = { |
59 | id: entries[id] |
60 | for id in sorted(entries.keys(), key=lambda entry: (entries[entry]["date"])) |
61 | } |
62 | return entries |
63 | |
64 | |
65 | entries = _entries() |
66 | |
67 | |
68 | def _series(): |
69 | series = [] |
70 | for entry_data in reversed(entries.values()): |
71 | if "series" in entry_data and slugify(entry_data["series"]) not in map( |
72 | slugify, series |
73 | ): |
74 | series += [entry_data["series"].strip()] |
75 | return series |
76 | |
77 | |
78 | series = _series() |
79 | |
80 | |
81 | def _tags(): |
82 | tags = [] |
83 | for entry_data in entries.values(): |
84 | if "tags" in entry_data: |
85 | tags += [ |
86 | tag.strip() |
87 | for tag in entry_data["tags"] |
88 | if slugify(tag) not in map(slugify, tags) |
89 | ] |
90 | return sorted(tags) |
91 | |
92 | |
93 | tags = _tags() |
94 | |
95 | years = sorted( |
96 | set([datetime.datetime(entry["date"].year, 1, 1) for entry in entries.values()]) |
97 | ) |
98 | |
99 | months = sorted( |
100 | set( |
101 | [ |
102 | datetime.datetime(entry["date"].year, entry["date"].month, 1) |
103 | for entry in entries.values() |
104 | ] |
105 | ) |
106 | ) |
107 | |
108 | days = sorted( |
109 | set( |
110 | [ |
111 | datetime.datetime( |
112 | entry["date"].year, entry["date"].month, entry["date"].day |
113 | ) |
114 | for entry in entries.values() |
115 | ] |
116 | ) |
117 | ) |
118 | |
119 | updated = sorted( |
120 | [entry["date"] for entry in entries.values()] |
121 | + [entry["modified"] for entry in entries.values() if "modified" in entry] |
122 | )[-1] |
123 |
Built with git-ssb-web