archiving
This commit is contained in:
parent
e3641ec1ad
commit
cb93e046af
@ -1,5 +1,5 @@
|
|||||||
import email, email.parser
|
import email, email.parser
|
||||||
import os, json, gzip, re
|
import os, json, gzip, re, logging
|
||||||
import mysql.connector as mariadb
|
import mysql.connector as mariadb
|
||||||
import archive.sql, archive.util
|
import archive.sql, archive.util
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
@ -51,9 +51,9 @@ def connect_db(database, host, user, password):
|
|||||||
try:
|
try:
|
||||||
con = mariadb.connect(host=host, user=user, password=password, database=database)
|
con = mariadb.connect(host=host, user=user, password=password, database=database)
|
||||||
except mariadb.Error as error:
|
except mariadb.Error as error:
|
||||||
print("Error: {}".format(error))
|
logging.error("Error: {}".format(error))
|
||||||
if error.errno == 1049:
|
if error.errno == 1049:
|
||||||
print("Database " + database + " does not exist.")
|
logging.error("Database " + database + " does not exist.")
|
||||||
return None
|
return None
|
||||||
finally:
|
finally:
|
||||||
return con
|
return con
|
||||||
@ -73,15 +73,13 @@ def list_tables_db(database, host, user, password):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
except mariadb.Error as error:
|
except mariadb.Error as error:
|
||||||
print("Error: {}".format(error))
|
logging.error("Error: {}".format(error))
|
||||||
finally:
|
finally:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
|
def list_tables_db_config(config):
|
||||||
|
return list_tables_db(config['database'], config['host'], config['user'], config['password'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Archive:
|
class Archive:
|
||||||
|
|
||||||
@ -93,10 +91,10 @@ class Archive:
|
|||||||
# this is twisted................ two constructors... dumb
|
# this is twisted................ two constructors... dumb
|
||||||
if isinstance(config, str):
|
if isinstance(config, str):
|
||||||
# need a filename or a dir name....
|
# need a filename or a dir name....
|
||||||
print("reading archive " + archive_name, end='')
|
logging.info("reading archive " + archive_name)
|
||||||
archive_dir = config
|
archive_dir = config
|
||||||
(self.data, self.archive_name) = load_from_file(archive_name, archive_name, archive_dir)
|
(self.data, self.archive_name) = load_from_file(archive_name, archive_name, archive_dir)
|
||||||
print(" - done.")
|
logging.info(" - done.")
|
||||||
elif isinstance(config, dict):
|
elif isinstance(config, dict):
|
||||||
self.archive_name = archive_name
|
self.archive_name = archive_name
|
||||||
self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
|
self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||||
@ -116,24 +114,24 @@ class Archive:
|
|||||||
|
|
||||||
def create_db(self, config=None):
|
def create_db(self, config=None):
|
||||||
|
|
||||||
print("creating table: " + self.archive_name, end='')
|
logging.info("creating table: " + self.archive_name)
|
||||||
if self.db_con is None:
|
if self.db_con is None:
|
||||||
if config is not None:
|
if config is not None:
|
||||||
self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
|
self.db_con = connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||||
|
|
||||||
if self.db_con is None:
|
if self.db_con is None:
|
||||||
print(" - no connection... Aborting.")
|
logging.warning(" - no connection... Aborting.")
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cursor = self.db_con.cursor()
|
cursor = self.db_con.cursor()
|
||||||
cursor.execute(archive.sql.CREATE.format(self.archive_name))
|
cursor.execute(archive.sql.CREATE.format(self.archive_name))
|
||||||
except mariadb.Error as error:
|
except mariadb.Error as error:
|
||||||
print("Error: {}".format(error))
|
logging.error("Error: {}".format(error))
|
||||||
finally:
|
finally:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
print(" - done.")
|
logging.info(" - done.")
|
||||||
|
|
||||||
|
|
||||||
def insert_db(self, config=None):
|
def insert_db(self, config=None):
|
||||||
@ -153,7 +151,7 @@ class Archive:
|
|||||||
for t in self.data:
|
for t in self.data:
|
||||||
|
|
||||||
n_inserted = self.recursive_insert_db(cursor, t["threads"])
|
n_inserted = self.recursive_insert_db(cursor, t["threads"])
|
||||||
# print(" - insert: " + str(n_inserted), end='')
|
logging.info(" - " + str(n_inserted))
|
||||||
if n_inserted > 0:
|
if n_inserted > 0:
|
||||||
self.db_con.commit()
|
self.db_con.commit()
|
||||||
|
|
||||||
@ -164,7 +162,7 @@ class Archive:
|
|||||||
self.db_con.commit()
|
self.db_con.commit()
|
||||||
|
|
||||||
except mariadb.Error as error:
|
except mariadb.Error as error:
|
||||||
print("Error: {}".format(error))
|
logging.error("Error: {}".format(error))
|
||||||
pass
|
pass
|
||||||
finally:
|
finally:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
@ -175,13 +173,15 @@ class Archive:
|
|||||||
for m in thread:
|
for m in thread:
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
logging.info(" - in - " + m['date'] + " " + m['subject'])
|
||||||
|
|
||||||
from_ = archive.util.format_from(m)
|
from_ = archive.util.format_from(m)
|
||||||
author_name_ = archive.util.format_author(m)
|
author_name_ = archive.util.format_author(m)
|
||||||
to_ = archive.util.format_to(m)
|
to_ = archive.util.format_to(m)
|
||||||
date_ = archive.util.format_date(m, self.archive_name)
|
date_ = archive.util.format_date(m, self.archive_name)
|
||||||
|
|
||||||
if date_ is None or from_ is None:
|
if date_ is None or from_ is None:
|
||||||
print("\nDATE ERROR: " + m['from'] + " - " + m['date'])
|
logging.warning("\nDATE ERROR: " + m['from'] + " - " + m['date'])
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
@ -197,10 +197,11 @@ class Archive:
|
|||||||
if error.errno == 1062:
|
if error.errno == 1062:
|
||||||
#duplication continue <------------------------- look this up...
|
#duplication continue <------------------------- look this up...
|
||||||
# print("\nError: {}".format(error))
|
# print("\nError: {}".format(error))
|
||||||
|
logging.info("+++db+++ duplicate")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
print("\nError: {}".format(error))
|
logging.error("\nError: {}".format(error))
|
||||||
print(str_insert)
|
logging.error(str_insert)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return n_inserted
|
return n_inserted
|
||||||
@ -208,7 +209,7 @@ class Archive:
|
|||||||
def content_search(self, term, bool=True):
|
def content_search(self, term, bool=True):
|
||||||
|
|
||||||
if self.db_con is None:
|
if self.db_con is None:
|
||||||
print("Not connection to database...")
|
logging.warning("Not connection to database...")
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -226,14 +227,14 @@ class Archive:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
except mariadb.Error as error:
|
except mariadb.Error as error:
|
||||||
print("Error: {}".format(error))
|
logging.error("Error: {}".format(error))
|
||||||
finally:
|
finally:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
def from_search(self, term, bool=True):
|
def from_search(self, term, bool=True):
|
||||||
|
|
||||||
if self.db_con is None:
|
if self.db_con is None:
|
||||||
print("Not connection to database...")
|
logging.warning("Not connection to database...")
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -251,7 +252,7 @@ class Archive:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
except mariadb.Error as error:
|
except mariadb.Error as error:
|
||||||
print("Error: {}".format(error))
|
logging.erro("Error: {}".format(error))
|
||||||
finally:
|
finally:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,9 @@ CREATE = "CREATE TABLE `{}` (" \
|
|||||||
"FULLTEXT (`from_`, `author_name_`)" \
|
"FULLTEXT (`from_`, `author_name_`)" \
|
||||||
") ENGINE = InnoDB;"
|
") ENGINE = InnoDB;"
|
||||||
|
|
||||||
|
# FULLTEXT manual
|
||||||
|
# ALTER TABLE tableName ADD FULLTEXT(columnA, columnB);
|
||||||
|
|
||||||
INSERT = ("INSERT INTO {}"
|
INSERT = ("INSERT INTO {}"
|
||||||
"(from_, author_name_, to_, subject_, date_, content_type_, content_, url_) "
|
"(from_, author_name_, to_, subject_, date_, content_type_, content_, url_) "
|
||||||
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
|
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
|
||||||
|
|||||||
@ -215,7 +215,7 @@ def format_id(msg, archive_name):
|
|||||||
|
|
||||||
# format='%d/%m/%Y'
|
# format='%d/%m/%Y'
|
||||||
def min_date(archive_name):
|
def min_date(archive_name):
|
||||||
if "nettime" in archive_name:
|
if archive_name == "nettime_l":
|
||||||
return '01/10/1995'
|
return '01/10/1995'
|
||||||
elif archive_name == "spectre":
|
elif archive_name == "spectre":
|
||||||
return '01/08/2001'
|
return '01/08/2001'
|
||||||
@ -223,3 +223,8 @@ def min_date(archive_name):
|
|||||||
return '01/01/2002'
|
return '01/01/2002'
|
||||||
elif archive_name == "crumb":
|
elif archive_name == "crumb":
|
||||||
return '01/02/2001'
|
return '01/02/2001'
|
||||||
|
elif archive_name == "oldboys":
|
||||||
|
return '01/03/2001'
|
||||||
|
elif archive_name == "nettime_bold":
|
||||||
|
return '01/01/2000'
|
||||||
|
|
||||||
|
|||||||
26
index.py
26
index.py
@ -2,6 +2,7 @@ import os, logging, argparse
|
|||||||
from glob import glob
|
from glob import glob
|
||||||
import archive.archive as archive
|
import archive.archive as archive
|
||||||
import config
|
import config
|
||||||
|
import terminal.util as util
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
@ -10,25 +11,32 @@ def list_archives(archives_dir):
|
|||||||
|
|
||||||
def run(lists, archives):
|
def run(lists, archives):
|
||||||
logging.debug("indexing: " + str(lists) + " from " + archives)
|
logging.debug("indexing: " + str(lists) + " from " + archives)
|
||||||
|
lists_db = archive.list_tables_db_config(config.db)
|
||||||
|
|
||||||
for a in lists:
|
for a in lists:
|
||||||
ar = archive.Archive(a, archives)
|
ar = archive.Archive(a, archives)
|
||||||
|
if a not in lists_db:
|
||||||
|
if util.y_n_question("Archive " + a + " db table does not exist. Create it?"):
|
||||||
|
ar.create_db(config.db)
|
||||||
|
else:
|
||||||
|
logging.info("Table not created. Aborting.")
|
||||||
|
return
|
||||||
ar.insert_db(config.db)
|
ar.insert_db(config.db)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
||||||
p.add_argument('list', metavar="list", help="list(s) to index", nargs="+")
|
p.add_argument('list', metavar="list", help="list(s) to index", nargs="+")
|
||||||
p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
|
p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
|
||||||
|
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
if not args.archives:
|
if not args.archives:
|
||||||
args.archives = config.archives
|
args.archives = config.archives
|
||||||
|
|
||||||
if len(args.list) == 1 and args.list[0] == "all":
|
if len(args.list) == 1 and args.list[0] == "all":
|
||||||
args.list = list_archives(args.archives)
|
args.list = list_archives(args.archives)
|
||||||
|
|
||||||
run(args.list, args.archives)
|
run(args.list, args.archives)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
def y_n_question(question_str):
|
def y_n_question(question_str):
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user