diff --git a/archive/archive.py b/archive/archive.py index fc525fb..bd16bd4 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -1,5 +1,5 @@ import email, email.parser -import os, json, gzip, re +import os, json, gzip, re, logging import mysql.connector as mariadb import archive.sql, archive.util from datetime import date, datetime @@ -51,9 +51,9 @@ def connect_db(database, host, user, password): try: con = mariadb.connect(host=host, user=user, password=password, database=database) except mariadb.Error as error: - print("Error: {}".format(error)) + logging.error("Error: {}".format(error)) if error.errno == 1049: - print("Database " + database + " does not exist.") + logging.error("Database " + database + " does not exist.") return None finally: return con @@ -73,15 +73,13 @@ def list_tables_db(database, host, user, password): return results except mariadb.Error as error: - print("Error: {}".format(error)) + logging.error("Error: {}".format(error)) finally: cursor.close() con.close() - - - - +def list_tables_db_config(config): + return list_tables_db(config['database'], config['host'], config['user'], config['password']) class Archive: @@ -93,10 +91,10 @@ class Archive: # this is twisted................ two constructors... dumb if isinstance(config, str): # need a filename or a dir name.... - print("reading archive " + archive_name, end='') + logging.info("reading archive " + archive_name) archive_dir = config (self.data, self.archive_name) = load_from_file(archive_name, archive_name, archive_dir) - print(" - done.") + logging.info(" - done.") elif isinstance(config, dict): self.archive_name = archive_name self.db_con = connect_db(config['database'], config['host'], config['user'], config['password']) @@ -116,24 +114,24 @@ class Archive: def create_db(self, config=None): - print("creating table: " + self.archive_name, end='') + logging.info("creating table: " + self.archive_name) if self.db_con is None: if config is not None: self.db_con = connect_db(config['database'], config['host'], config['user'], config['password']) if self.db_con is None: - print(" - no connection... Aborting.") + logging.warning(" - no connection... Aborting.") return try: cursor = self.db_con.cursor() cursor.execute(archive.sql.CREATE.format(self.archive_name)) except mariadb.Error as error: - print("Error: {}".format(error)) + logging.error("Error: {}".format(error)) finally: cursor.close() - print(" - done.") + logging.info(" - done.") def insert_db(self, config=None): @@ -151,9 +149,9 @@ class Archive: progress = terminal.progress.ProgressBar(self.archive_name, len(self.data), fmt=terminal.progress.ProgressBar.FULL) for t in self.data: - + n_inserted = self.recursive_insert_db(cursor, t["threads"]) - # print(" - insert: " + str(n_inserted), end='') + logging.info(" - " + str(n_inserted)) if n_inserted > 0: self.db_con.commit() @@ -164,7 +162,7 @@ class Archive: self.db_con.commit() except mariadb.Error as error: - print("Error: {}".format(error)) + logging.error("Error: {}".format(error)) pass finally: cursor.close() @@ -175,13 +173,15 @@ class Archive: for m in thread: try: + logging.info(" - in - " + m['date'] + " " + m['subject']) + from_ = archive.util.format_from(m) author_name_ = archive.util.format_author(m) to_ = archive.util.format_to(m) date_ = archive.util.format_date(m, self.archive_name) if date_ is None or from_ is None: - print("\nDATE ERROR: " + m['from'] + " - " + m['date']) + logging.warning("\nDATE ERROR: " + m['from'] + " - " + m['date']) continue @@ -197,10 +197,11 @@ class Archive: if error.errno == 1062: #duplication continue <------------------------- look this up... # print("\nError: {}".format(error)) + logging.info("+++db+++ duplicate") continue else: - print("\nError: {}".format(error)) - print(str_insert) + logging.error("\nError: {}".format(error)) + logging.error(str_insert) continue return n_inserted @@ -208,7 +209,7 @@ class Archive: def content_search(self, term, bool=True): if self.db_con is None: - print("Not connection to database...") + logging.warning("Not connection to database...") return try: @@ -226,14 +227,14 @@ class Archive: return results except mariadb.Error as error: - print("Error: {}".format(error)) + logging.error("Error: {}".format(error)) finally: cursor.close() def from_search(self, term, bool=True): if self.db_con is None: - print("Not connection to database...") + logging.warning("Not connection to database...") return try: @@ -251,7 +252,7 @@ class Archive: return results except mariadb.Error as error: - print("Error: {}".format(error)) + logging.erro("Error: {}".format(error)) finally: cursor.close() diff --git a/archive/sql.py b/archive/sql.py index 46a1a25..ad6d63a 100644 --- a/archive/sql.py +++ b/archive/sql.py @@ -12,6 +12,9 @@ CREATE = "CREATE TABLE `{}` (" \ "FULLTEXT (`from_`, `author_name_`)" \ ") ENGINE = InnoDB;" +# FULLTEXT manual +# ALTER TABLE tableName ADD FULLTEXT(columnA, columnB); + INSERT = ("INSERT INTO {}" "(from_, author_name_, to_, subject_, date_, content_type_, content_, url_) " "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)") diff --git a/archive/util.py b/archive/util.py index ca4ede1..5ebed8e 100755 --- a/archive/util.py +++ b/archive/util.py @@ -215,7 +215,7 @@ def format_id(msg, archive_name): # format='%d/%m/%Y' def min_date(archive_name): - if "nettime" in archive_name: + if archive_name == "nettime_l": return '01/10/1995' elif archive_name == "spectre": return '01/08/2001' @@ -223,3 +223,8 @@ def min_date(archive_name): return '01/01/2002' elif archive_name == "crumb": return '01/02/2001' + elif archive_name == "oldboys": + return '01/03/2001' + elif archive_name == "nettime_bold": + return '01/01/2000' + diff --git a/index.py b/index.py index cb3169e..165cf31 100644 --- a/index.py +++ b/index.py @@ -2,6 +2,7 @@ import os, logging, argparse from glob import glob import archive.archive as archive import config +import terminal.util as util logging.basicConfig(level=logging.DEBUG) @@ -10,25 +11,32 @@ def list_archives(archives_dir): def run(lists, archives): logging.debug("indexing: " + str(lists) + " from " + archives) + lists_db = archive.list_tables_db_config(config.db) for a in lists: ar = archive.Archive(a, archives) + if a not in lists_db: + if util.y_n_question("Archive " + a + " db table does not exist. Create it?"): + ar.create_db(config.db) + else: + logging.info("Table not created. Aborting.") + return ar.insert_db(config.db) if __name__ == "__main__": - p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') - p.add_argument('list', metavar="list", help="list(s) to index", nargs="+") - p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives) + p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') + p.add_argument('list', metavar="list", help="list(s) to index", nargs="+") + p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives) - args = p.parse_args() + args = p.parse_args() - if not args.archives: - args.archives = config.archives + if not args.archives: + args.archives = config.archives - if len(args.list) == 1 and args.list[0] == "all": - args.list = list_archives(args.archives) + if len(args.list) == 1 and args.list[0] == "all": + args.list = list_archives(args.archives) - run(args.list, args.archives) + run(args.list, args.archives) diff --git a/terminal/util.py b/terminal/util.py index 695b671..e43a492 100644 --- a/terminal/util.py +++ b/terminal/util.py @@ -1,3 +1,4 @@ +import sys def y_n_question(question_str):