117 lines
2.6 KiB
Python
117 lines
2.6 KiB
Python
import argparse, os, glob, sys, json
|
|
import xml.etree.ElementTree as et
|
|
from datetime import datetime
|
|
|
|
def format_subject(s):
|
|
return ' '.join(s.split())
|
|
|
|
def format_content(c):
|
|
return c.strip().replace("\n\n\n", "\n")
|
|
|
|
def parse_date(fname):
|
|
return datetime.strptime(fname, '%B_%Y.json')
|
|
|
|
def emit_mail_xml(msg, xmlel):
|
|
|
|
mail = et.SubElement(xmlel, 'mail')
|
|
|
|
subject = et.SubElement(mail, 'subject')
|
|
subject.text = format_subject(msg['subject'])
|
|
|
|
to = et.SubElement(mail, 'to')
|
|
to.text = msg['to']
|
|
|
|
from_ = et.SubElement(mail, 'from')
|
|
from_.text = msg['from']
|
|
|
|
date = et.SubElement(mail, 'date')
|
|
date.text = msg['date']
|
|
|
|
content = et.SubElement(mail, 'content')
|
|
## unescape chars ...
|
|
content.text = format_content(msg['content'])
|
|
|
|
if msg['follow-up']:
|
|
print('follow-up')
|
|
|
|
# **** RECURSIVE "follow-up" ****
|
|
|
|
|
|
def export_file(f, fout):
|
|
|
|
with open(f) as fp:
|
|
d = json.load(fp)
|
|
|
|
all_mail = et.Element('all')
|
|
for t in d['threads']:
|
|
emit_mail_xml(t, all_mail)
|
|
|
|
fout.write(et.tostring(all_mail))
|
|
|
|
def export_year(d, dt, fout):
|
|
|
|
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
|
|
|
|
chapter = et.Element('chapter')
|
|
year = et.SubElement(chapter, 'year')
|
|
year.text = dt.strftime('%Y')
|
|
|
|
# SORT MONTHS BEFORE WRITING TO XML
|
|
dates = []
|
|
for f in dir_files:
|
|
|
|
fdt = parse_date(os.path.basename(f))
|
|
if dt.year != fdt.year:
|
|
continue
|
|
|
|
dates.append((fdt, f))
|
|
|
|
dates.sort(key=lambda tup: tup[0])
|
|
|
|
for d, f in dates:
|
|
|
|
print(d)
|
|
print(f)
|
|
|
|
section = et.SubElement(chapter, 'section')
|
|
month = et.SubElement(section, 'month')
|
|
month.text = d.strftime('%B')
|
|
|
|
with open(f) as fp:
|
|
dj = json.load(fp)
|
|
|
|
mails = et.SubElement(section, 'mails')
|
|
for t in dj['threads']:
|
|
emit_mail_xml(t, mails)
|
|
|
|
fout.write(et.tostring(chapter))
|
|
|
|
if __name__ == "__main__":
|
|
|
|
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
|
p.add_argument('file', metavar="f", help="mailinglist file(s) or dir(s) to export", nargs="+")
|
|
p.add_argument('--year', '-y', metavar='y', type=str, help='year of archive')
|
|
|
|
args = p.parse_args()
|
|
|
|
if args.year:
|
|
dt = datetime.strptime(args.year, '%Y')
|
|
|
|
if not args.file:
|
|
sys.exit('No file(s). Aborting.')
|
|
|
|
# with open("out.xml", "w") as fout:
|
|
for f in args.file:
|
|
if args.year:
|
|
if not os.path.isdir(f):
|
|
sys.exit(f + ' is not a valid directory. Aborting.')
|
|
foutname = os.path.basename(f) + "_" + dt.strftime('%Y') + ".xml"
|
|
with open(foutname, "wb") as fout:
|
|
export_year(f, dt, fout)
|
|
else:
|
|
if not os.path.isfile(f):
|
|
sys.exit(f + ' is not a valid file. Aborting.')
|
|
export_file(f, fout)
|
|
|
|
|