diff --git a/export/exportlist.py b/export/exportlist.py new file mode 100644 index 0000000..e4aa3e4 --- /dev/null +++ b/export/exportlist.py @@ -0,0 +1,24 @@ +import os, logging, glob +import xmltodict, json +import config + +xml_dump = os.path.join(config.export['path'], config.export['xml']) + +def list_all(dirname=config.export['path'], extension="xml"): + + if not os.path.isdir(dirname): + logging.error(dirname + " is not a valid directory.") + return None + + return [f for f in glob.glob(os.path.join(dirname, "*." + extension))] + +def get(fn, extension="xml"): + if not os.path.isfile(fn): + logging.error(fn + " is not a valid file.") + return None + + if extension == "xml": + with open(fn) as fp: + dxml = fp.read() + return xmltodict.parse(dxml) #<--- this is werid parse mails/mail.... + # return json.loads(json.dumps(xmltodict.parse(dxml))) diff --git a/export/exportxml.py b/export/exportxml.py new file mode 100644 index 0000000..c16a2b0 --- /dev/null +++ b/export/exportxml.py @@ -0,0 +1,161 @@ +import json, os +import xml.etree.ElementTree as et +import export.utils +import config +from datetime import datetime + +nn = 0 + +sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump']) +xml_dump = os.path.join(config.export['path'], config.export['xml']) + + +def export_generate_path(tag): + now = datetime.now() + return os.path.join(config.export['path'], tag + "_[now].xml") + +def emit_mail_xml(msg, xmlel): + + global nn + nn += 1 + + mail = et.SubElement(xmlel, 'mail') + + subject = et.SubElement(mail, 'subject') + subject.text = export.utils.format_subject(msg['subject']) + + to = et.SubElement(mail, 'to') + if 'to' in msg: + to.text = msg['to'] + else: + to.text = 'n/a' + + from_ = et.SubElement(mail, 'from') + from_.text = msg['from'] + + date = et.SubElement(mail, 'date') + date.text = msg['date'] + + content = et.SubElement(mail, 'content') + content.text = export.utils.format_content(msg['content']) + + # recursuve "follow-up" + if 'follow-up' in msg: + followups = export.utils.index_follow_up(msg) + followups.sort(key=lambda tup: tup[0]) + for d, f in followups: + emit_mail_xml(f, xmlel) + + +#------------------------------------------------------------ +# The following functions parse the selection files +#------------------------------------------------------------ + +def export_single_tag(t, sel, fout): + + if t not in list(sel.keys()): + logging.error("Tag: " + t + " does not exists.") + return False + + ch = sel[t] + + chapter = et.Element('chapter') + chapter_title = et.SubElement(chapter, 'title') + chapter_title.text = t + + chapter_desc = et.SubElement(chapter, 'desc') + chapter_desc.text = ch['desc'] + + chapter_mails = et.SubElement(chapter, 'mails') + + for m in ch['lists']: + emit_mail_xml(m, chapter_mails) + + fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore'))) + + return True + +def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump): + + with open(sel_dump) as fin: + d = json.load(fin) + + with open(xml_dump, 'w') as fout: + for k in d.keys(): + if not export_single_tag(k, d, fout): + logging.error("Error exporting: " + k) + return False + return True + +def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump): + + with open(sel_dump) as fin: + d = json.load(fin) + + now = datetime.now() + xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S")) + + with open(xml_out, 'w') as fout: + if not export_single_tag(tag, d, fout): + logging.error("Error exporting: " + tag) + return False + return True + + + +#------------------------------------------------------------ +# The following functions parse the archive files directly +#------------------------------------------------------------ + +def export_file(f, fout): + + with open(f) as fp: + d = json.load(fp) + + all_mail = et.Element('all') + for t in d['threads']: + emit_mail_xml(t, all_mail) + + fout.write(et.tostring(all_mail).decode('utf-8', 'ignore')) + +def parse_date_file(fname): + return datetime.strptime(fname, '%B_%Y.json') + +def export_year(d, dt, fout): + + dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))] + + chapter = et.Element('chapter') + year = et.SubElement(chapter, 'year') + year.text = dt.strftime('%Y') + + # SORT MONTHS BEFORE WRITING TO XML + dates = [] + for f in dir_files: + + fdt = parse_date_file(os.path.basename(f)) + if dt.year != fdt.year: + continue + + dates.append((fdt, f)) + + dates.sort(key=lambda tup: tup[0]) + + for d, f in dates: + + logging.debug(f) + + section = et.SubElement(chapter, 'section') + month = et.SubElement(section, 'month') + month.text = d.strftime('%B') + + with open(f) as fp: + dj = json.load(fp) + + mails = et.SubElement(section, 'mails') + for t in dj['threads']: + emit_mail_xml(t, mails) + + # write utf8 to file (et.tostring are bytes) + # fout.write(et.tostring(chapter).decode('utf-8', 'ignore')) + fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore'))) diff --git a/export/utils.py b/export/utils.py new file mode 100644 index 0000000..1f9ddb4 --- /dev/null +++ b/export/utils.py @@ -0,0 +1,62 @@ +from datetime import datetime +import regex as re +import email.utils, logging + +xml_re = re.compile('[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]') +nl_re = re.compile('\n\n\n') +ind_re = re.compile('--------------------------------------------------------------------------') + +def format_subject(s): + return ' '.join(s.split()) + +def format_content(c): + + c = c.strip() + + # new lines + c = re.sub(nl_re, '\n', c) + + ## weird stuff + # 1. indesign automatic overset... (? dunno why ?) + # ex: Sat, 22 Nov 1997 18:23:59 -0500 (The Mattel Crackdown -- Nettime) + c = re.sub(ind_re, '', c) + + return c + +# returns utc timestamp +def parse_date_msg(msg): + date_str = msg['date'] + time_tz = None + try: + date_tz = email.utils.parsedate_tz(date_str) + time_tz = email.utils.mktime_tz(date_tz) #utc timestamp + except TypeError: + logging.warning("Format Date TypeError") + loggin.warning(" > " + date_str) + return None + except ValueError: + loggin.warning("Format Date ValueError") + loggin.warning(" > " + date_str) + return None + except Exception as ex: + print(ex) + finally: + return time_tz + +# recursive +def index_follow_up(msg): + r = [] + if 'follow-up' in msg: + for m in msg['follow-up']: + d = parse_date_msg(m) + if d is None: + d = parse_date_msg(msg) # same as parent + r.append((d, m)) + r += index_follow_up(m) + return r + +# See for Nevejan?s research +def remove_invalid_xml_characters(s): + return re.sub(xml_re, '', s) + # return re.sub(r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]', '', s) + # return re.sub(r'\p{Cc}-[\r\n\t]+', '', s) diff --git a/selection/sel.py b/selection/sel.py index 5b22a53..cc0c527 100644 --- a/selection/sel.py +++ b/selection/sel.py @@ -2,6 +2,7 @@ import os, json, glob, logging from selection import strutil from threading import Lock import config +import export.exportxml sel = os.path.join(config.selection['path'], config.selection['sel']) sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump']) @@ -133,6 +134,7 @@ def tags_w_lists(): for m in v['lists']: l += recursive_info(m, keep_hierachy=True) t['lists'] = l + t['export'] = export.exportxml.export_generate_path(k) tags.append(t) return tags diff --git a/www/routes.py b/www/routes.py index cbed55d..7626bf3 100644 --- a/www/routes.py +++ b/www/routes.py @@ -2,6 +2,7 @@ from flask import render_template, request, jsonify, send_from_directory from www import app import json, logging from selection import sel +from export import exportxml, exportlist import urllib.parse import urllib.request import config @@ -48,11 +49,35 @@ def tags_w_lists(): if a == "dump": if sel.commit_from_selection(): return "done" - if a == "delete": + elif a == "delete": if sel.delete_url(data.get('tag'), data.get('url')): return "ok" + elif a == "export": + pass return "-" +@app.route('/export', methods = ['POST']) +def export(): + if request.method == 'POST': + data = request.form + a = data.get('action') + if a == "export": + if exportxml.export_selection_tag(data.get('tagid'), xml_out=data.get('exportpath')): + return 'ok' + elif a == "export_all": + if exportxml.export_selection_all(): + return 'ok' + return "-" + +@app.route('/ex/') +def ex(fn): + if fn == "all": + return render_template("ex_all.html", files=exportlist.list_all()) + else: + e = exportlist.get(fn) + if e: + return render_template("chapter.html", chapter=e['chapter']) + return "Request for " + fn + " failed..." @app.route('/report') def report(): diff --git a/www/static/lestyle.css b/www/static/lestyle.css index 71fa956..c85523d 100644 --- a/www/static/lestyle.css +++ b/www/static/lestyle.css @@ -40,3 +40,21 @@ bb { li button { margin-top: 0em; } + +.header { + font-weight: bold; +} + +.payload { + white-space: pre-wrap; + font-family: monospace; + display: block; + unicode-bidi: embed; + border: 2px solid red; + word-wrap: break-word; + word-break: break-all; +} + +.mail { + width: 35em; +} diff --git a/www/static/tags_w_lists.js b/www/static/tags_w_lists.js index f7fefc4..410c5b7 100644 --- a/www/static/tags_w_lists.js +++ b/www/static/tags_w_lists.js @@ -26,4 +26,29 @@ $(document).ready(function(){ }); }); + $('.export').click(function(e) { + var form = $(this).parent("form"); + var g = form.serialize() + "&tagid=" + form.data('tagid') + "&action=export"; + $('#status').text("Exporting - " + form.data('tagid')) + $.post('/export', g, function(d) { + if(d === 'ok') { + $('#status').text("Exporting - " + form.data('tagid') + " - success!!!") + } else { + $('#status').text("Exporting - " + form.data('tagid') + " - error...") + } + }); + }); + + $('#export_all').click(function(a){ + var g = "&action=export_all"; + $('#status').text("Exporting - all") + $.post('/export', g, function(d) { + if(d === 'ok') { + $('#status').text("Exporting - all - success!!!") + } else { + $('#status').text("Exporting - all - error...") + } + }); + }); + }); \ No newline at end of file diff --git a/www/templates/chapter.html b/www/templates/chapter.html new file mode 100644 index 0000000..852d3bc --- /dev/null +++ b/www/templates/chapter.html @@ -0,0 +1,27 @@ + + + + Chapter: {{chapter.title}} + + + +

{{chapter.title}}

+

{{chapter.desc}}

+
+{% for m in chapter.mails.mail %} +
+
+
+
From: {{m.from}}
+
To: {{m.to}}
+
Date: {{m.date}}
+
Subject: {{m.subject}}
+
+
+ {{m.content}} +
+
+{% endfor %} +
+ + \ No newline at end of file diff --git a/www/templates/ex_all.html b/www/templates/ex_all.html new file mode 100644 index 0000000..68063ae --- /dev/null +++ b/www/templates/ex_all.html @@ -0,0 +1,16 @@ + + + + Exports [all] + + +

Exports [all] - [SEARCH, TAGS]

+
+ +
+ + \ No newline at end of file diff --git a/www/templates/selection_tags_w_lists.html b/www/templates/selection_tags_w_lists.html index 9978298..786cec6 100644 --- a/www/templates/selection_tags_w_lists.html +++ b/www/templates/selection_tags_w_lists.html @@ -7,11 +7,10 @@ -

Selection [tags w lists]

+

Selection [tags w lists] - [SEARCH, TAGS]


-

SEARCH

-

TAGS

- +
Ok
+
{% for v in tags %}
@@ -19,8 +18,10 @@
+ +