import json, os, logging import xml.etree.ElementTree as et import export.utils import config from datetime import datetime nn = 0 sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump']) xml_dump = os.path.join(config.export['path'], config.export['xml']) def export_generate_path(tag): now = datetime.now() return os.path.join(config.export['path'], tag + "_[now].xml") def emit_mail_xml(msg, li, xmlel): global nn nn += 1 logging.info("export xml: " + li) mail = et.SubElement(xmlel, 'mail') subject = et.SubElement(mail, 'subject') subject.text = export.utils.format_subject(msg['subject']) to = et.SubElement(mail, 'to') if 'to' in msg: to.text = msg['to'] else: to.text = 'n/a' from_ = et.SubElement(mail, 'from') from_.text = msg['from'] date = et.SubElement(mail, 'date') date.text = msg['date'] content = et.SubElement(mail, 'content') content.text = export.utils.format_content(msg['content']) # recursuve "follow-up" if 'follow-up' in msg: followups = export.utils.index_follow_up(msg) followups.sort(key=lambda tup: tup[0]) for d, f in followups: emit_mail_xml(f, li, xmlel) #------------------------------------------------------------ # The following functions parse the selection files #------------------------------------------------------------ def export_single_tag(t, sel, fout): if t not in list(sel.keys()): logging.error("Tag: " + t + " does not exists.") return False ch = sel[t] chapter = et.Element('chapter') chapter_title = et.SubElement(chapter, 'title') chapter_title.text = t chapter_desc = et.SubElement(chapter, 'desc') chapter_desc.text = ch['desc'] chapter_mails = et.SubElement(chapter, 'mails') for m in ch['lists']: emit_mail_xml(m, m['list'], chapter_mails) fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore'))) return True def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump): with open(sel_dump) as fin: d = json.load(fin) with open(xml_dump, 'w') as fout: for k in d.keys(): if not export_single_tag(k, d, fout): logging.error("Error exporting: " + k) return False return True def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump): with open(sel_dump) as fin: d = json.load(fin) now = datetime.now() xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S")) with open(xml_out, 'w') as fout: if not export_single_tag(tag, d, fout): logging.error("Error exporting: " + tag) return False return True #------------------------------------------------------------ # The following functions parse the archive files directly #------------------------------------------------------------ def export_file(f, fout): with open(f) as fp: d = json.load(fp) all_mail = et.Element('all') for t in d['threads']: emit_mail_xml(t, all_mail) fout.write(et.tostring(all_mail).decode('utf-8', 'ignore')) def parse_date_file(fname): return datetime.strptime(fname, '%B_%Y.json') def export_year(d, dt, fout): dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))] chapter = et.Element('chapter') year = et.SubElement(chapter, 'year') year.text = dt.strftime('%Y') # SORT MONTHS BEFORE WRITING TO XML dates = [] for f in dir_files: fdt = parse_date_file(os.path.basename(f)) if dt.year != fdt.year: continue dates.append((fdt, f)) dates.sort(key=lambda tup: tup[0]) for d, f in dates: logging.debug(f) section = et.SubElement(chapter, 'section') month = et.SubElement(section, 'month') month.text = d.strftime('%B') with open(f) as fp: dj = json.load(fp) mails = et.SubElement(section, 'mails') for t in dj['threads']: emit_mail_xml(t, mails) # write utf8 to file (et.tostring are bytes) # fout.write(et.tostring(chapter).decode('utf-8', 'ignore')) fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))