This commit is contained in:
gauthiier 2022-07-31 17:16:51 +02:00
parent 9c4cdb72ad
commit 14128a5db5
4 changed files with 73 additions and 1 deletions

View File

@ -267,6 +267,29 @@ class Archive:
finally:
cursor.close()
def select_all(self):
if self.db_con is None:
logging.warning("Not connection to database...")
return
try:
cursor = self.db_con.cursor(buffered=True)
cursor.execute(archive.sql.SELECT_ALL.format(self.archive_name))
# print(cursor.rowcount)
results = []
for (from_, author_name_, to_, subject_, date_, content_type_, content_, url_) in cursor:
results.append((from_, author_name_, to_, subject_, date_, content_type_, content_, url_))
# print("{} {} {}".format(from_, str(date_), url_))
return results
except mariadb.Error as error:
logging.error("Error: {}".format(error))
finally:
cursor.close()
# analysis
def longest_field(self, field, thread, max_length=0):
import archive.util

View File

@ -33,4 +33,7 @@ FROM_QUERY_NL = ("SELECT from_, author_name_, subject_, date_, url_ FROM {} "
SHOW_TABLE = "show tables"
SELECT_ALL = ("SELECT * FROM {} "
"ORDER BY date_")
# SELECT from_, author_name_, subject_, date_, url_ FROM nettime_l WHERE MATCH(content_) AGAINST('%s' IN BOOLEAN MODE)

46
export.py Normal file
View File

@ -0,0 +1,46 @@
import argparse, mailbox, email
import archive.archive as archive
import config
if __name__ == "__main__":
p = argparse.ArgumentParser(description='Export mailinglists archives')
p.add_argument('name', metavar="name", help="name of the archive to export")
p.add_argument('--out', '-o', help="mbox output")
args = p.parse_args()
with archive.Archive(args.name, config=config.db) as a:
r = a.select_all()
if args.out:
mbox = mailbox.mbox(args.out)
for (from_, author_name_, to_, subject_, date_, content_type_, content_, url_) in r:
m = mailbox.mboxMessage()
m['To'] = to_
m['Subject'] = subject_
addr = from_.replace('{at}', '@')
if author_name_:
m['From'] = email.utils.formataddr((author_name_, addr))
else:
m['From'] = addr
m['Date'] = email.utils.format_datetime(date_)
m['Content-Type'] = content_type_
m.set_payload(content_)
mbox.add(m)
mbox.flush()
else:
for (from_, author_name_, to_, subject_, date_, content_type_, content_, url_) in r:
print(date_)

View File

@ -28,7 +28,7 @@ def run(args):
for l in args.list:
if l not in all_lists:
logging.warning(l + "is not a valud list... continuing")
logging.warning(l + "is not a valid list... continuing")
continue
k_arg = args.keyword