archiving
This commit is contained in:
parent
e3641ec1ad
commit
cb93e046af
@ -1,5 +1,5 @@
|
||||
import email, email.parser
|
||||
import os, json, gzip, re
|
||||
import os, json, gzip, re, logging
|
||||
import mysql.connector as mariadb
|
||||
import archive.sql, archive.util
|
||||
from datetime import date, datetime
|
||||
@ -51,9 +51,9 @@ def connect_db(database, host, user, password):
|
||||
try:
|
||||
con = mariadb.connect(host=host, user=user, password=password, database=database)
|
||||
except mariadb.Error as error:
|
||||
print("Error: {}".format(error))
|
||||
logging.error("Error: {}".format(error))
|
||||
if error.errno == 1049:
|
||||
print("Database " + database + " does not exist.")
|
||||
logging.error("Database " + database + " does not exist.")
|
||||
return None
|
||||
finally:
|
||||
return con
|
||||
@ -73,15 +73,13 @@ def list_tables_db(database, host, user, password):
|
||||
return results
|
||||
|
||||
except mariadb.Error as error:
|
||||
print("Error: {}".format(error))
|
||||
logging.error("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
con.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def list_tables_db_config(config):
|
||||
return list_tables_db(config['database'], config['host'], config['user'], config['password'])
|
||||
|
||||
class Archive:
|
||||
|
||||
@ -93,10 +91,10 @@ class Archive:
|
||||
# this is twisted................ two constructors... dumb
|
||||
if isinstance(config, str):
|
||||
# need a filename or a dir name....
|
||||
print("reading archive " + archive_name, end='')
|
||||
logging.info("reading archive " + archive_name)
|
||||
archive_dir = config
|
||||
(self.data, self.archive_name) = load_from_file(archive_name, archive_name, archive_dir)
|
||||
print(" - done.")
|
||||
logging.info(" - done.")
|
||||
elif isinstance(config, dict):
|
||||
self.archive_name = archive_name
|
||||
self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||
@ -116,24 +114,24 @@ class Archive:
|
||||
|
||||
def create_db(self, config=None):
|
||||
|
||||
print("creating table: " + self.archive_name, end='')
|
||||
logging.info("creating table: " + self.archive_name)
|
||||
if self.db_con is None:
|
||||
if config is not None:
|
||||
self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||
|
||||
if self.db_con is None:
|
||||
print(" - no connection... Aborting.")
|
||||
logging.warning(" - no connection... Aborting.")
|
||||
return
|
||||
|
||||
try:
|
||||
cursor = self.db_con.cursor()
|
||||
cursor.execute(archive.sql.CREATE.format(self.archive_name))
|
||||
except mariadb.Error as error:
|
||||
print("Error: {}".format(error))
|
||||
logging.error("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
print(" - done.")
|
||||
logging.info(" - done.")
|
||||
|
||||
|
||||
def insert_db(self, config=None):
|
||||
@ -151,9 +149,9 @@ class Archive:
|
||||
progress = terminal.progress.ProgressBar(self.archive_name, len(self.data), fmt=terminal.progress.ProgressBar.FULL)
|
||||
|
||||
for t in self.data:
|
||||
|
||||
|
||||
n_inserted = self.recursive_insert_db(cursor, t["threads"])
|
||||
# print(" - insert: " + str(n_inserted), end='')
|
||||
logging.info(" - " + str(n_inserted))
|
||||
if n_inserted > 0:
|
||||
self.db_con.commit()
|
||||
|
||||
@ -164,7 +162,7 @@ class Archive:
|
||||
self.db_con.commit()
|
||||
|
||||
except mariadb.Error as error:
|
||||
print("Error: {}".format(error))
|
||||
logging.error("Error: {}".format(error))
|
||||
pass
|
||||
finally:
|
||||
cursor.close()
|
||||
@ -175,13 +173,15 @@ class Archive:
|
||||
for m in thread:
|
||||
try:
|
||||
|
||||
logging.info(" - in - " + m['date'] + " " + m['subject'])
|
||||
|
||||
from_ = archive.util.format_from(m)
|
||||
author_name_ = archive.util.format_author(m)
|
||||
to_ = archive.util.format_to(m)
|
||||
date_ = archive.util.format_date(m, self.archive_name)
|
||||
|
||||
if date_ is None or from_ is None:
|
||||
print("\nDATE ERROR: " + m['from'] + " - " + m['date'])
|
||||
logging.warning("\nDATE ERROR: " + m['from'] + " - " + m['date'])
|
||||
continue
|
||||
|
||||
|
||||
@ -197,10 +197,11 @@ class Archive:
|
||||
if error.errno == 1062:
|
||||
#duplication continue <------------------------- look this up...
|
||||
# print("\nError: {}".format(error))
|
||||
logging.info("+++db+++ duplicate")
|
||||
continue
|
||||
else:
|
||||
print("\nError: {}".format(error))
|
||||
print(str_insert)
|
||||
logging.error("\nError: {}".format(error))
|
||||
logging.error(str_insert)
|
||||
continue
|
||||
|
||||
return n_inserted
|
||||
@ -208,7 +209,7 @@ class Archive:
|
||||
def content_search(self, term, bool=True):
|
||||
|
||||
if self.db_con is None:
|
||||
print("Not connection to database...")
|
||||
logging.warning("Not connection to database...")
|
||||
return
|
||||
|
||||
try:
|
||||
@ -226,14 +227,14 @@ class Archive:
|
||||
return results
|
||||
|
||||
except mariadb.Error as error:
|
||||
print("Error: {}".format(error))
|
||||
logging.error("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
def from_search(self, term, bool=True):
|
||||
|
||||
if self.db_con is None:
|
||||
print("Not connection to database...")
|
||||
logging.warning("Not connection to database...")
|
||||
return
|
||||
|
||||
try:
|
||||
@ -251,7 +252,7 @@ class Archive:
|
||||
return results
|
||||
|
||||
except mariadb.Error as error:
|
||||
print("Error: {}".format(error))
|
||||
logging.erro("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
|
||||
@ -12,6 +12,9 @@ CREATE = "CREATE TABLE `{}` (" \
|
||||
"FULLTEXT (`from_`, `author_name_`)" \
|
||||
") ENGINE = InnoDB;"
|
||||
|
||||
# FULLTEXT manual
|
||||
# ALTER TABLE tableName ADD FULLTEXT(columnA, columnB);
|
||||
|
||||
INSERT = ("INSERT INTO {}"
|
||||
"(from_, author_name_, to_, subject_, date_, content_type_, content_, url_) "
|
||||
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
|
||||
|
||||
@ -215,7 +215,7 @@ def format_id(msg, archive_name):
|
||||
|
||||
# format='%d/%m/%Y'
|
||||
def min_date(archive_name):
|
||||
if "nettime" in archive_name:
|
||||
if archive_name == "nettime_l":
|
||||
return '01/10/1995'
|
||||
elif archive_name == "spectre":
|
||||
return '01/08/2001'
|
||||
@ -223,3 +223,8 @@ def min_date(archive_name):
|
||||
return '01/01/2002'
|
||||
elif archive_name == "crumb":
|
||||
return '01/02/2001'
|
||||
elif archive_name == "oldboys":
|
||||
return '01/03/2001'
|
||||
elif archive_name == "nettime_bold":
|
||||
return '01/01/2000'
|
||||
|
||||
|
||||
26
index.py
26
index.py
@ -2,6 +2,7 @@ import os, logging, argparse
|
||||
from glob import glob
|
||||
import archive.archive as archive
|
||||
import config
|
||||
import terminal.util as util
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
@ -10,25 +11,32 @@ def list_archives(archives_dir):
|
||||
|
||||
def run(lists, archives):
|
||||
logging.debug("indexing: " + str(lists) + " from " + archives)
|
||||
lists_db = archive.list_tables_db_config(config.db)
|
||||
|
||||
for a in lists:
|
||||
ar = archive.Archive(a, archives)
|
||||
if a not in lists_db:
|
||||
if util.y_n_question("Archive " + a + " db table does not exist. Create it?"):
|
||||
ar.create_db(config.db)
|
||||
else:
|
||||
logging.info("Table not created. Aborting.")
|
||||
return
|
||||
ar.insert_db(config.db)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
||||
p.add_argument('list', metavar="list", help="list(s) to index", nargs="+")
|
||||
p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
|
||||
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
||||
p.add_argument('list', metavar="list", help="list(s) to index", nargs="+")
|
||||
p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
|
||||
|
||||
args = p.parse_args()
|
||||
args = p.parse_args()
|
||||
|
||||
if not args.archives:
|
||||
args.archives = config.archives
|
||||
if not args.archives:
|
||||
args.archives = config.archives
|
||||
|
||||
if len(args.list) == 1 and args.list[0] == "all":
|
||||
args.list = list_archives(args.archives)
|
||||
if len(args.list) == 1 and args.list[0] == "all":
|
||||
args.list = list_archives(args.archives)
|
||||
|
||||
run(args.list, args.archives)
|
||||
run(args.list, args.archives)
|
||||
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import sys
|
||||
|
||||
def y_n_question(question_str):
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user