List_server_busy/export/exportxml.py
2019-12-28 15:58:48 +01:00

288 lines
6.9 KiB
Python

import json, os, logging
import xml.etree.ElementTree as et
from xml.sax.saxutils import unescape, escape
import export.utils
import config
from datetime import datetime
from export import emailreply
import re
nn = 0
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
xml_dump = os.path.join(config.export['path'], config.export['xml'])
def export_generate_path(tag):
now = datetime.now()
return os.path.join(config.export['path'], tag + "_[now].xml")
def make_xml_element(el, val):
return "<" + el + ">" + escape(val) + "</" + el + ">"
def emit_mail_xml(msg, li):
global nn
nn += 1
logging.info("export xml: " + li)
subject = make_xml_element("subject", msg['subject']) + "\n"
if 'to' in msg:
to = make_xml_element("to", msg['to']) + "\n"
else:
to = make_xml_element("to", 'n/a') + "\n"
from_ = make_xml_element("from", msg['from']) + "\n"
date = make_xml_element("date", msg['date']) + "\n"
'''
todo:
- filter reply
- unescape XML
'''
e = emailreply.EmailMessage(export.utils.format_content(msg['content']))
e.read()
content_stripped = re.sub(r'(?<!\n)\n(?!\n)', ' ', e.reply)
content = make_xml_element("content", content_stripped) + "\n"
mail = "<mail>\n" + subject + from_ + to + date + content + "</mail>\n"
# content = et.SubElement(mail, 'content')
# content.text = e.reply
# recursuve "follow-up"
if 'follow-up' in msg:
followups = export.utils.index_follow_up(msg)
followups.sort(key=lambda tup: tup[0])
for d, f in followups:
mail += emit_mail_xml(f, li)
return mail
# def emit_mail_xml(msg, li, xmlel):
# global nn
# nn += 1
# logging.info("export xml: " + li)
# mail = et.SubElement(xmlel, 'mail')
# subject = et.SubElement(mail, 'subject')
# subject.text = export.utils.format_subject(msg['subject'])
# to = et.SubElement(mail, 'to')
# if 'to' in msg:
# to.text = msg['to']
# else:
# to.text = 'n/a'
# from_ = et.SubElement(mail, 'from')
# from_.text = msg['from']
# date = et.SubElement(mail, 'date')
# date.text = msg['date']
# '''
# todo:
# - filter reply
# - unescape XML
# '''
# e = emailreply.EmailMessage(export.utils.format_content(msg['content']))
# e.read()
# escape_table = {
# "&": "&amp;",
# ">": "&gt;",
# "<": "&lt;"
# }
# content_str = "<content>" + escape(e.reply, escape_table) + "</content>"
# print(content_str)
# content = et.fromstring(content_str)
# mail.append(content)
# # content = et.SubElement(mail, 'content')
# # content.text = e.reply
# # recursuve "follow-up"
# if 'follow-up' in msg:
# followups = export.utils.index_follow_up(msg)
# followups.sort(key=lambda tup: tup[0])
# for d, f in followups:
# emit_mail_xml(f, li, xmlel)
#------------------------------------------------------------
# The following functions parse the selection files
#------------------------------------------------------------
def export_single_tag(t, sel, fout):
if t not in list(sel.keys()):
logging.error("Tag: " + t + " does not exists.")
return False
ch = sel[t]
chapter = "<chapter>\n"
chapter_title = make_xml_element("title", t) + "\n"
chapter_desc = make_xml_element("desc", ch['desc']) + "\n"
chapter_mails = "<mails>\n"
for m in ch['lists']:
chapter_mails += emit_mail_xml(m, m['list'])
chapter_mails += "</mails>\n"
chapter = "<chapter>\n" + chapter_mails + "</chapter>"
fout.write(chapter.encode('utf-8'))
# # root = et.ElementTree(chapter)
# # root.write(fout, encoding="utf-8", xml_declaration=True)
# # xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('unicode-escape', 'ignore'))
# xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore'))
# xml = '<?xml version="1.0" encoding="UTF-8"?>' + xml
# print(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore'))
# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))
return True
# def export_single_tag(t, sel, fout):
# if t not in list(sel.keys()):
# logging.error("Tag: " + t + " does not exists.")
# return False
# ch = sel[t]
# chapter = et.Element('chapter')
# chapter_title = et.SubElement(chapter, 'title')
# chapter_title.text = t
# chapter_desc = et.SubElement(chapter, 'desc')
# chapter_desc.text = ch['desc']
# chapter_mails = et.SubElement(chapter, 'mails')
# for m in ch['lists']:
# emit_mail_xml(m, m['list'], chapter_mails)
# # root = et.ElementTree(chapter)
# # root.write(fout, encoding="utf-8", xml_declaration=True)
# # xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('unicode-escape', 'ignore'))
# xml = export.utils.remove_invalid_xml_characters(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore'))
# xml = '<?xml version="1.0" encoding="UTF-8"?>' + xml
# print(et.tostring(chapter, encoding="utf-8").decode('utf-8', 'ignore'))
# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))
# return True
def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump):
with open(sel_dump) as fin:
d = json.load(fin)
with open(xml_dump, 'wb') as fout:
for k in d.keys():
if not export_single_tag(k, d, fout):
logging.error("Error exporting: " + k)
return False
return True
def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump):
with open(sel_dump) as fin:
d = json.load(fin)
now = datetime.now()
xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S"))
with open(xml_out, 'wb') as fout:
if not export_single_tag(tag, d, fout):
logging.error("Error exporting: " + tag)
return False
return True
#------------------------------------------------------------
# The following functions parse the archive files directly
#------------------------------------------------------------
def export_file(f, fout):
with open(f) as fp:
d = json.load(fp)
all_mail = et.Element('all')
for t in d['threads']:
emit_mail_xml(t, all_mail)
fout.write(et.tostring(all_mail).decode('utf-8', 'ignore'))
def parse_date_file(fname):
return datetime.strptime(fname, '%B_%Y.json')
def export_year(d, dt, fout):
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
chapter = et.Element('chapter')
year = et.SubElement(chapter, 'year')
year.text = dt.strftime('%Y')
# SORT MONTHS BEFORE WRITING TO XML
dates = []
for f in dir_files:
fdt = parse_date_file(os.path.basename(f))
if dt.year != fdt.year:
continue
dates.append((fdt, f))
dates.sort(key=lambda tup: tup[0])
for d, f in dates:
logging.debug(f)
section = et.SubElement(chapter, 'section')
month = et.SubElement(section, 'month')
month.text = d.strftime('%B')
with open(f) as fp:
dj = json.load(fp)
mails = et.SubElement(section, 'mails')
for t in dj['threads']:
emit_mail_xml(t, mails)
# write utf8 to file (et.tostring are bytes)
# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))
fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))