import argparse, os, glob, sys, json import xml.etree.ElementTree as et from datetime import datetime def format_subject(s): return ' '.join(s.split()) def format_content(c): return c.strip().replace("\n\n\n", "\n") def parse_date(fname): return datetime.strptime(fname, '%B_%Y.json') def emit_mail_xml(msg, xmlel): mail = et.SubElement(xmlel, 'mail') subject = et.SubElement(mail, 'subject') subject.text = format_subject(msg['subject']) to = et.SubElement(mail, 'to') to.text = msg['to'] from_ = et.SubElement(mail, 'from') from_.text = msg['from'] date = et.SubElement(mail, 'date') date.text = msg['date'] content = et.SubElement(mail, 'content') ## unescape chars ... content.text = format_content(msg['content']) if msg['follow-up']: print('follow-up') # **** RECURSIVE "follow-up" **** def export_file(f, fout): with open(f) as fp: d = json.load(fp) all_mail = et.Element('all') for t in d['threads']: emit_mail_xml(t, all_mail) fout.write(et.tostring(all_mail)) def export_year(d, dt, fout): dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))] chapter = et.Element('chapter') year = et.SubElement(chapter, 'year') year.text = dt.strftime('%Y') # SORT MONTHS BEFORE WRITING TO XML dates = [] for f in dir_files: fdt = parse_date(os.path.basename(f)) if dt.year != fdt.year: continue dates.append((fdt, f)) dates.sort(key=lambda tup: tup[0]) for d, f in dates: print(d) print(f) section = et.SubElement(chapter, 'section') month = et.SubElement(section, 'month') month.text = d.strftime('%B') with open(f) as fp: dj = json.load(fp) mails = et.SubElement(section, 'mails') for t in dj['threads']: emit_mail_xml(t, mails) fout.write(et.tostring(chapter)) if __name__ == "__main__": p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') p.add_argument('file', metavar="f", help="mailinglist file(s) or dir(s) to export", nargs="+") p.add_argument('--year', '-y', metavar='y', type=str, help='year of archive') args = p.parse_args() if args.year: dt = datetime.strptime(args.year, '%Y') if not args.file: sys.exit('No file(s). Aborting.') # with open("out.xml", "w") as fout: for f in args.file: if args.year: if not os.path.isdir(f): sys.exit(f + ' is not a valid directory. Aborting.') foutname = os.path.basename(f) + "_" + dt.strftime('%Y') + ".xml" with open(foutname, "wb") as fout: export_year(f, dt, fout) else: if not os.path.isfile(f): sys.exit(f + ' is not a valid file. Aborting.') export_file(f, fout)