git ssb

0+

thomas-dev-patchwork / MediaProcessor



Commit b46e4ec37c3250819bcfec6a303708c4994256a0

added readme.md and dumpDB2html.py files

Thomas Freedman committed on 2/13/2023, 8:38:49 PM
Parent: 762c108c6dc008453fbff099340086a31f6f17a5

Files changed

dumpDB2html.pyadded
readme.mdadded
dumpDB2html.pyView
@@ -1,0 +1,120 @@
1 +#!/usr/bin/python3
2 +#
3 +# Program to dump a MediaProcessor DB into a folder of html files
4 +#
5 +# This will create an index.html file that has links to all files
6 +# produced by this program. Each file is named by the IPFS hash
7 +# for the media content. It contains all of the metadata and as a
8 +# hidden set of name/value pairs with a few visible lines for use
9 +# in a web browser.
10 +#
11 +import datetime
12 +import sqlite3
13 +import time
14 +import sys
15 +import os
16 +
17 +def usage():
18 + cmd = sys.argv[0]
19 + str = "\nCreates a file for each row of the target SQLite database.\n"
20 + str += "Usage: " + cmd + " <-d sqlite DB file> <-f folder> [-s since]\n\n"
21 + print(str)
22 + exit(0)
23 +
24 +HTML1 = "<!DOCTYPE html>\n\t<html>\n\t\t<head>\n\t\t\t<title>"
25 +HTML2 = "</title>\n\t\t</head>\n\t\t<body>\n"
26 +HTML3 = "</pre>\n</body>\n</html>\n"
27 +
28 +#############################################################################
29 +# #
30 +# main #
31 +# #
32 +#############################################################################
33 +argc = len(sys.argv)
34 +cols = []
35 +conn = sql = row = url = since = None
36 +
37 +try:
38 + # Parse command line
39 + if argc > 4:
40 + # Required parameter: SQLite database file to check pins against
41 + if sys.argv[1] == "-d":
42 + dbFile = sys.argv[2]
43 + conn = sqlite3.connect(dbFile) # Failure to open is exception
44 + conn.row_factory = sqlite3.Row # Result format to dictionary
45 + else: usage()
46 +
47 + # Required parameter: folder where files will be written
48 + if sys.argv[3] == "-f":
49 + folder = sys.argv[4]
50 + if not os.path.exists(folder):
51 + os.mkdir(folder)
52 + else: usage()
53 +
54 + # Optional parameter: since date
55 + if argc == 7:
56 + if sys.argv[5] == "-s":
57 + try:
58 + since = sys.argv[6]
59 + datetime.datetime.strptime(since, '%Y-%m-%d')
60 + except Exception:
61 + print("Incorrect date format, must be YYYY-MM-DD")
62 + exit(0)
63 + else: usage()
64 + else: usage()
65 +
66 + # Make an index.html file with links to all files generated
67 + with open(folder + "/index.html", 'w') as idx:
68 + idx.write(HTML1 + "IPFS Hash Index" + HTML2 + "<pre>\n")
69 + sql = "SELECT vhash FROM IPFS_HASH_INDEX ORDER BY grupe, sqlts"
70 + rows = (conn.cursor().execute(sql)).fetchall()
71 + for row in range(0, len(rows)):
72 + vhash = rows[row]['vhash']
73 + if vhash and len(vhash) == 46:
74 + idx.write(f"<a href='{vhash}.html'>{vhash}</a> ")
75 + if (row + 1) % 3 == 0: idx.write('\n')
76 + idx.write(HTML3)
77 +
78 + # Create the files, with contents being all metadata fields & values.
79 + # Display a few items at top of file and leave most hidden.
80 + sql = "SELECT * FROM IPFS_HASH_INDEX "
81 + rows = conn.cursor().execute(sql + "limit 1;")
82 + for col in rows.description: # First get column names
83 + cols.append(col[0])
84 +
85 + if since: sql += f"WHERE sqlts >= '{since}' "
86 + sql += "ORDER BY grupe, sqlts"
87 +
88 + rows = (conn.cursor().execute(sql)).fetchall() # Now get the column values
89 + fiveSpaces = "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
90 + for row in rows:
91 + vhash = row['vhash']
92 + if not vhash or len(vhash) < 46: continue
93 + file = folder + '/' + row['vhash'] + ".html"
94 + with open(file, 'w') as f:
95 + f.write(HTML1 + row['title'] + HTML2)
96 + ipfs = f"http://127.0.0.1:8080/ipfs/{row['vhash']}"
97 + url = f"<a href='{row['webpage_url']}'>{row['webpage_url']}</a>"
98 + f.write(f"\t\t\t<h3><a href='{ipfs}'>Open on IPFS</a></h3>\n")
99 + f.write(f"\t\t\t<strong>Title:</strong>{fiveSpaces} {row['title']}<br>\n")
100 + f.write(f"\t\t\t<strong>Source:</strong>{fiveSpaces} {url}<br>\n")
101 + f.write(f"\t\t\t<strong>Description:</strong>\n")
102 + f.write(f"\t\t\t<p>{row['description']}</p>\n")
103 +
104 + # Remainder of columns will only be visible with "show page source"
105 + f.write(f"<pre style='display:none'>\n")
106 + for col in cols:
107 + f.write(f"{col} = {row[col]}\n")
108 + f.write(HTML3)
109 +
110 +except sqlite3.Error as e:
111 + print("Database error during query: %s\nSQL=%s\n\n" % (e, sql))
112 +
113 +except sqlite3.OperationalError:
114 + print("ERROR!: Query Failure")
115 +
116 +except Exception as e:
117 + print("Exception: %s\n\n" % e)
118 +
119 +exit(0)
120 +
readme.mdView
@@ -1,0 +1,16 @@
1 +# Media Processor
2 +This repository contains code for the MediaProcessor python3 class and related software. It uses yt-dlp to scrape content, primarily (but not exclusively) audio and video from all sources supported by yt-dlp and adds it to IPFS. It also obtains the JSON metadata for that content, and adds it to IPFS and a SQLite database that serves as a search index.
3 +
4 +The SQLite database can be published on a website for search indexing using the dumpDB2html program. This is intended to provide a decentralized search index for all content added to IPFS by computers that utilize this MediaProcessor class, using search index software such as YaCy.
5 +
6 +---------------------------------
7 +***Requirements***
8 +
9 +See the imports at the top of the MediaProcessor.py class file for list for the required python3 packages. Additional requirements:
10 +
11 +1. an IPFS daemon providing full command line support
12 +2. (optional) an email account that accepts SMTP connections
13 +3. (optional) a web server to publish an HTML version of the IPFS hashes and metadata
14 +4. (optional) web crawler such as YaCy to provide a user interface to search for content
15 +5. (optional) IPFS Companion or IPFS Desktop to view content
16 +6. (optional) multiple IP addresses to use for downloading content

Built with git-ssb-web