Commit df96375f367727303549e4a1bf734ee6279a6b25
add offset log iterator.
Daan Wynen committed on 6/1/2020, 10:33:44 PMParent: 88d12a6f5c74156aa026e14c5ebc0eea12062e9b
Files changed
offsetlog.py | added |
offsetlog.py | ||
---|---|---|
@@ -1,0 +1,67 @@ | ||
1 … | +class OffsetLog: | |
2 … | + | |
3 … | + def __init__(self, filename, mode='r', num_bytes_filesize=4): | |
4 … | + self.filename = filename | |
5 … | + self.num_bytes_filesize = num_bytes_filesize | |
6 … | + if mode not in 'rwa': | |
7 … | + raise ValueError('mode needs to be "r", "w", or "a"') | |
8 … | + if mode != 'r': | |
9 … | + raise NotImplementedError('Write access not implemented yet.') | |
10 … | + self.mode = mode | |
11 … | + self.fp = None | |
12 … | + | |
13 … | + def open(self): | |
14 … | + self.fp = open(self.filename, f'{self.mode}b') | |
15 … | + self.current_offset = 0 | |
16 … | + self.size_before = None | |
17 … | + self.size_after = None | |
18 … | + | |
19 … | + def close(self): | |
20 … | + self.fp.close() | |
21 … | + | |
22 … | + def __enter__(self): | |
23 … | + self.open() | |
24 … | + return self | |
25 … | + | |
26 … | + def __exit__(self, exc_type, exc_value, traceback): | |
27 … | + self.close() | |
28 … | + | |
29 … | + def retrieve_after(self, offset=None): | |
30 … | + if offset is not None: | |
31 … | + self.fp.seek(offset) | |
32 … | + else: | |
33 … | + offset = self.fp.tell() | |
34 … | + pre_bytes = self.fp.read(4) | |
35 … | + if len(pre_bytes) == 0: | |
36 … | + return None | |
37 … | + entry_size_pre = int.from_bytes(pre_bytes, 'big', signed=False) | |
38 … | + content = self.fp.read(entry_size_pre) | |
39 … | + entry_size_post = int.from_bytes(self.fp.read(4), 'big', signed=False) | |
40 … | + file_size_post = int.from_bytes(self.fp.read(self.num_bytes_filesize), | |
41 … | + 'big', signed=False) | |
42 … | + post_tell = self.fp.tell() | |
43 … | + if file_size_post != post_tell: | |
44 … | + raise Exception(f"Invalid state after reading entry at offset {offset}." | |
45 … | + f" Entry metadata indicates offset {file_size_post} for" | |
46 … | + f" the following entry, but actual offset is now " | |
47 … | + f"{post_tell}.") | |
48 … | + if entry_size_pre != entry_size_post: | |
49 … | + raise Exception(f"Pre- and Post-append byte counts for messages at " | |
50 … | + f"{offset} don't match! Pre: {entry_size_pre} " | |
51 … | + f"Post: {entry_size_post}.") | |
52 … | + return content, entry_size_pre, file_size_post | |
53 … | + | |
54 … | + def _iter_forward(self): | |
55 … | + current_offset = 0 | |
56 … | + while True: | |
57 … | + block = self.retrieve_after(current_offset) | |
58 … | + if block is None: | |
59 … | + return | |
60 … | + content, size, current_offset = block | |
61 … | + if set(block[0]) == set(b'\x00'): | |
62 … | + continue | |
63 … | + current_offset = block[2] | |
64 … | + yield block | |
65 … | + | |
66 … | + def __iter__(self): | |
67 … | + return self._iter_forward() |
Built with git-ssb-web