import mysql.connector as mariadb import os, json, glob, logging from lxml import etree as et import db.sql, db.utils class DB: db_con = None def __init__(self, config): self.db_con = db.utils.connect_db(config['database'], config['host'], config['user'], config['password']) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if self.db_con is not None: self.db_con.close() def create_db(self, config=None): logging.info("creating table 'full_digest_rescheduled'") if self.db_con is None: if config is not None: self.db_con = db.utils.connect_db(config['database'], config['host'], config['user'], config['password']) if self.db_con is None: logging.warning(" - no connection... Aborting.") return try: cursor = self.db_con.cursor() cursor.execute(db.sql.CREATE) except mariadb.Error as error: logging.error("Error: {}".format(error)) finally: cursor.close() logging.info(" - done.") def insert_db(self, xml_file, urls_index_file, config=None): if self.db_con is None: if config is not None: self.db_con = db.utils.connect_db(config['database'], config['host'], config['user'], config['password']) if self.db_con is None: logging.warning(" - no connection... Aborting.") return if not os.path.isfile(xml_file): logging.error(f + " is not a valid file.") return None # urls_index_file = os.path.join(config.index['path'], config.index['urls']) if not os.path.isfile(urls_index_file): logging.error(urls_index_file + " is not a valid file.") return None with open(urls_index_file) as fp: urls = json.load(fp) ch = os.path.basename(xml_file).split('.')[0] root = et.parse(xml_file).getroot() try: logging.info("-----------------") logging.info(os.path.basename(xml_file)) logging.info("-----------------") cursor = self.db_con.cursor() for m in root.findall('mails/mail'): nbr_str = m.find('nbr').text to_str = m.find('to').text date_str = m.find('date').text from_str = m.find('from').text subject_str = m.find('subject').text content_str = m.find('content').text # format nbr nbr_str = ch + '.' + nbr_str if nbr_str not in urls: logging.warning(nbr_str + " is not in urls... skipping.") continue url = urls[nbr_str] date = db.utils.format_date(date_str) if date is None: logging.warning("null date: " + nbr_str + " - " + date_str + " - " + from_str) logging.warning("continuing...") continue # aaarrrgghhh if to_str == "n/a": to_str = "syndicate@aec.at" try: # (nbr_, author_name_, to_, subject_, date_, content_, url_) logging.info("inserting " + nbr_str) r = cursor.execute(db.sql.INSERT, (nbr_str, from_str, to_str, subject_str, date, content_str, url)) if r == 0: logging.warning("error no insert...") logging.warning(nbr_str + " - " + from_str + " - " + to_str + " - " + subject_str + " - " + date + " - " + content_str + " - " + url) except mariadb.Error as error: if error.errno == 1062: logging.info("+++db+++ duplicate") continue else: logging.warning("Error...") logging.warning(nbr_str + " - " + from_str + " - " + to_str + " - " + subject_str + " - " + date + " - " + content_str + " - " + url) continue self.db_con.commit() except Exception as e: raise e finally: cursor.close() def content_search(self, term, bool=True): if self.db_con is None: logging.warning("Not connection to database...") return try: cursor = self.db_con.cursor(buffered=True) if bool: cursor.execute(db.sql.CONTENT_QUERY_BOOLEAN.format(self.archive_name, term)) else: cursor.execute(db.sql.CONTENT_QUERY.format(self.archive_name, term)) # nbr_, author_name_, to_, subject_, date_, url_ results = [] for (nbr_, author_name_, to_, subject_, date_, url_) in cursor: results.append({'nbr': nbr_, 'from': author_name_, 'to': to_, 'subject': subject_, 'date': date_, 'url': url_}) # print("{} {} {}".format(from_, str(date_), url_)) return results except mariadb.Error as error: logging.error("Error: {}".format(error)) finally: cursor.close() def from_search(self, term, bool=True): if self.db_con is None: logging.warning("Not connection to database...") return try: cursor = self.db_con.cursor(buffered=True) if bool: cursor.execute(archive.sql.FROM_QUERY_BOOLEAN.format(self.archive_name, term)) else: cursor.execute(archive.sql.FROM_QUERY.format(self.archive_name, term)) # print(cursor.rowcount) results = [] for (from_, author_name_, subject_, date_, url_) in cursor: results.append((from_, author_name_, subject_, date_, url_)) # print("{} {} {}".format(from_, str(date_), url_)) return results except mariadb.Error as error: logging.error("Error: {}".format(error)) finally: cursor.close()