import json, os, logging import xml.etree.ElementTree as et from xml.sax.saxutils import unescape, escape import export.utils import config from datetime import datetime from export import emailreply import re nn = 0 sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump']) xml_dump = os.path.join(config.export['path'], config.export['xml']) def export_generate_path(tag): now = datetime.now() return os.path.join(config.export['path'], tag + "_[now].xml") def make_xml_element(el, val): return "<" + el + ">" + escape(val) + "" def emit_mail_xml(msg, li): global nn nn += 1 logging.info("export xml: " + li) subject = make_xml_element("subject", msg['subject']) + "\n" if 'to' in msg: to = make_xml_element("to", msg['to']) + "\n" else: to = make_xml_element("to", 'n/a') + "\n" from_ = make_xml_element("from", msg['from']) + "\n" date = make_xml_element("date", msg['date']) + "\n" ''' todo: - filter reply - unescape XML ''' e = emailreply.EmailMessage(export.utils.format_content(msg['content'])) e.read() content_stripped = re.sub(r'(?\n" + subject + from_ + to + date + content + "\n" # content = et.SubElement(mail, 'content') # content.text = e.reply # recursuve "follow-up" if 'follow-up' in msg: followups = export.utils.index_follow_up(msg) followups.sort(key=lambda tup: tup[0]) for d, f in followups: mail += emit_mail_xml(f, li) return mail # def emit_mail_xml(msg, li, xmlel): # global nn # nn += 1 # logging.info("export xml: " + li) # mail = et.SubElement(xmlel, 'mail') # subject = et.SubElement(mail, 'subject') # subject.text = export.utils.format_subject(msg['subject']) # to = et.SubElement(mail, 'to') # if 'to' in msg: # to.text = msg['to'] # else: # to.text = 'n/a' # from_ = et.SubElement(mail, 'from') # from_.text = msg['from'] # date = et.SubElement(mail, 'date') # date.text = msg['date'] # ''' # todo: # - filter reply # - unescape XML # ''' # e = emailreply.EmailMessage(export.utils.format_content(msg['content'])) # e.read() # escape_table = { # "&": "&", # ">": ">", # "<": "<" # } # content_str = "" + escape(e.reply, escape_table) + "" # print(content_str) # content = et.fromstring(content_str) # mail.append(content) # # content = et.SubElement(mail, 'content') # # content.text = e.reply # # recursuve "follow-up" # if 'follow-up' in msg: # followups = export.utils.index_follow_up(msg) # followups.sort(key=lambda tup: tup[0]) # for d, f in followups: # emit_mail_xml(f, li, xmlel) #------------------------------------------------------------ # The following functions parse the selection files #------------------------------------------------------------ def export_single_tag(t, sel, fout): if t not in list(sel.keys()): logging.error("Tag: " + t + " does not exists.") return False ch = sel[t] chapter = "\n" chapter_title = make_xml_element("title", t) + "\n" chapter_desc = make_xml_element("desc", ch['desc']) + "\n" chapter_mails = "\n" for m in ch['lists']: chapter_mails += emit_mail_xml(m, m['list']) chapter_mails += "\n" chapter = "\n" + chapter_mails + "" fout.write(chapter.encode('utf-8')) # # root = et.ElementTree(chapter) # # root.write(fout, encoding="utf-8", xml_declaration=True) # # xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('unicode-escape', 'ignore')) # xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore')) # xml = '' + xml # print(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore')) # fout.write(et.tostring(chapter).decode('utf-8', 'ignore')) return True # def export_single_tag(t, sel, fout): # if t not in list(sel.keys()): # logging.error("Tag: " + t + " does not exists.") # return False # ch = sel[t] # chapter = et.Element('chapter') # chapter_title = et.SubElement(chapter, 'title') # chapter_title.text = t # chapter_desc = et.SubElement(chapter, 'desc') # chapter_desc.text = ch['desc'] # chapter_mails = et.SubElement(chapter, 'mails') # for m in ch['lists']: # emit_mail_xml(m, m['list'], chapter_mails) # # root = et.ElementTree(chapter) # # root.write(fout, encoding="utf-8", xml_declaration=True) # # xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('unicode-escape', 'ignore')) # xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore')) # xml = '' + xml # print(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore')) # fout.write(et.tostring(chapter).decode('utf-8', 'ignore')) # return True def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump): with open(sel_dump) as fin: d = json.load(fin) with open(xml_dump, 'wb') as fout: for k in d.keys(): if not export_single_tag(k, d, fout): logging.error("Error exporting: " + k) return False return True def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump): with open(sel_dump) as fin: d = json.load(fin) now = datetime.now() xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S")) with open(xml_out, 'wb') as fout: if not export_single_tag(tag, d, fout): logging.error("Error exporting: " + tag) return False return True #------------------------------------------------------------ # The following functions parse the archive files directly #------------------------------------------------------------ def export_file(f, fout): with open(f) as fp: d = json.load(fp) all_mail = et.Element('all') for t in d['threads']: emit_mail_xml(t, all_mail) fout.write(et.tostring(all_mail).decode('utf-8', 'ignore')) def parse_date_file(fname): return datetime.strptime(fname, '%B_%Y.json') def export_year(d, dt, fout): dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))] chapter = et.Element('chapter') year = et.SubElement(chapter, 'year') year.text = dt.strftime('%Y') # SORT MONTHS BEFORE WRITING TO XML dates = [] for f in dir_files: fdt = parse_date_file(os.path.basename(f)) if dt.year != fdt.year: continue dates.append((fdt, f)) dates.sort(key=lambda tup: tup[0]) for d, f in dates: logging.debug(f) section = et.SubElement(chapter, 'section') month = et.SubElement(section, 'month') month.text = d.strftime('%B') with open(f) as fp: dj = json.load(fp) mails = et.SubElement(section, 'mails') for t in dj['threads']: emit_mail_xml(t, mails) # write utf8 to file (et.tostring are bytes) # fout.write(et.tostring(chapter).decode('utf-8', 'ignore')) fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))