This commit is contained in:
gauthiier 2019-12-26 18:12:49 +01:00
parent 9c8f5b0e5e
commit 0b7564d44b
10 changed files with 366 additions and 5 deletions

24
export/exportlist.py Normal file
View File

@ -0,0 +1,24 @@
import os, logging, glob
import xmltodict, json
import config
xml_dump = os.path.join(config.export['path'], config.export['xml'])
def list_all(dirname=config.export['path'], extension="xml"):
if not os.path.isdir(dirname):
logging.error(dirname + " is not a valid directory.")
return None
return [f for f in glob.glob(os.path.join(dirname, "*." + extension))]
def get(fn, extension="xml"):
if not os.path.isfile(fn):
logging.error(fn + " is not a valid file.")
return None
if extension == "xml":
with open(fn) as fp:
dxml = fp.read()
return xmltodict.parse(dxml) #<--- this is werid parse mails/mail....
# return json.loads(json.dumps(xmltodict.parse(dxml)))

161
export/exportxml.py Normal file
View File

@ -0,0 +1,161 @@
import json, os
import xml.etree.ElementTree as et
import export.utils
import config
from datetime import datetime
nn = 0
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
xml_dump = os.path.join(config.export['path'], config.export['xml'])
def export_generate_path(tag):
now = datetime.now()
return os.path.join(config.export['path'], tag + "_[now].xml")
def emit_mail_xml(msg, xmlel):
global nn
nn += 1
mail = et.SubElement(xmlel, 'mail')
subject = et.SubElement(mail, 'subject')
subject.text = export.utils.format_subject(msg['subject'])
to = et.SubElement(mail, 'to')
if 'to' in msg:
to.text = msg['to']
else:
to.text = 'n/a'
from_ = et.SubElement(mail, 'from')
from_.text = msg['from']
date = et.SubElement(mail, 'date')
date.text = msg['date']
content = et.SubElement(mail, 'content')
content.text = export.utils.format_content(msg['content'])
# recursuve "follow-up"
if 'follow-up' in msg:
followups = export.utils.index_follow_up(msg)
followups.sort(key=lambda tup: tup[0])
for d, f in followups:
emit_mail_xml(f, xmlel)
#------------------------------------------------------------
# The following functions parse the selection files
#------------------------------------------------------------
def export_single_tag(t, sel, fout):
if t not in list(sel.keys()):
logging.error("Tag: " + t + " does not exists.")
return False
ch = sel[t]
chapter = et.Element('chapter')
chapter_title = et.SubElement(chapter, 'title')
chapter_title.text = t
chapter_desc = et.SubElement(chapter, 'desc')
chapter_desc.text = ch['desc']
chapter_mails = et.SubElement(chapter, 'mails')
for m in ch['lists']:
emit_mail_xml(m, chapter_mails)
fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))
return True
def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump):
with open(sel_dump) as fin:
d = json.load(fin)
with open(xml_dump, 'w') as fout:
for k in d.keys():
if not export_single_tag(k, d, fout):
logging.error("Error exporting: " + k)
return False
return True
def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump):
with open(sel_dump) as fin:
d = json.load(fin)
now = datetime.now()
xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S"))
with open(xml_out, 'w') as fout:
if not export_single_tag(tag, d, fout):
logging.error("Error exporting: " + tag)
return False
return True
#------------------------------------------------------------
# The following functions parse the archive files directly
#------------------------------------------------------------
def export_file(f, fout):
with open(f) as fp:
d = json.load(fp)
all_mail = et.Element('all')
for t in d['threads']:
emit_mail_xml(t, all_mail)
fout.write(et.tostring(all_mail).decode('utf-8', 'ignore'))
def parse_date_file(fname):
return datetime.strptime(fname, '%B_%Y.json')
def export_year(d, dt, fout):
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
chapter = et.Element('chapter')
year = et.SubElement(chapter, 'year')
year.text = dt.strftime('%Y')
# SORT MONTHS BEFORE WRITING TO XML
dates = []
for f in dir_files:
fdt = parse_date_file(os.path.basename(f))
if dt.year != fdt.year:
continue
dates.append((fdt, f))
dates.sort(key=lambda tup: tup[0])
for d, f in dates:
logging.debug(f)
section = et.SubElement(chapter, 'section')
month = et.SubElement(section, 'month')
month.text = d.strftime('%B')
with open(f) as fp:
dj = json.load(fp)
mails = et.SubElement(section, 'mails')
for t in dj['threads']:
emit_mail_xml(t, mails)
# write utf8 to file (et.tostring are bytes)
# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))
fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))

62
export/utils.py Normal file
View File

@ -0,0 +1,62 @@
from datetime import datetime
import regex as re
import email.utils, logging
xml_re = re.compile('[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]')
nl_re = re.compile('\n\n\n')
ind_re = re.compile('--------------------------------------------------------------------------')
def format_subject(s):
return ' '.join(s.split())
def format_content(c):
c = c.strip()
# new lines
c = re.sub(nl_re, '\n', c)
## weird stuff
# 1. indesign automatic overset... (? dunno why ?)
# ex: Sat, 22 Nov 1997 18:23:59 -0500 (The Mattel Crackdown -- Nettime)
c = re.sub(ind_re, '', c)
return c
# returns utc timestamp
def parse_date_msg(msg):
date_str = msg['date']
time_tz = None
try:
date_tz = email.utils.parsedate_tz(date_str)
time_tz = email.utils.mktime_tz(date_tz) #utc timestamp
except TypeError:
logging.warning("Format Date TypeError")
loggin.warning(" > " + date_str)
return None
except ValueError:
loggin.warning("Format Date ValueError")
loggin.warning(" > " + date_str)
return None
except Exception as ex:
print(ex)
finally:
return time_tz
# recursive
def index_follow_up(msg):
r = []
if 'follow-up' in msg:
for m in msg['follow-up']:
d = parse_date_msg(m)
if d is None:
d = parse_date_msg(msg) # same as parent
r.append((d, m))
r += index_follow_up(m)
return r
# See for Nevejan?s research
def remove_invalid_xml_characters(s):
return re.sub(xml_re, '', s)
# return re.sub(r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]', '', s)
# return re.sub(r'\p{Cc}-[\r\n\t]+', '', s)

View File

@ -2,6 +2,7 @@ import os, json, glob, logging
from selection import strutil from selection import strutil
from threading import Lock from threading import Lock
import config import config
import export.exportxml
sel = os.path.join(config.selection['path'], config.selection['sel']) sel = os.path.join(config.selection['path'], config.selection['sel'])
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump']) sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
@ -133,6 +134,7 @@ def tags_w_lists():
for m in v['lists']: for m in v['lists']:
l += recursive_info(m, keep_hierachy=True) l += recursive_info(m, keep_hierachy=True)
t['lists'] = l t['lists'] = l
t['export'] = export.exportxml.export_generate_path(k)
tags.append(t) tags.append(t)
return tags return tags

View File

@ -2,6 +2,7 @@ from flask import render_template, request, jsonify, send_from_directory
from www import app from www import app
import json, logging import json, logging
from selection import sel from selection import sel
from export import exportxml, exportlist
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import config import config
@ -48,11 +49,35 @@ def tags_w_lists():
if a == "dump": if a == "dump":
if sel.commit_from_selection(): if sel.commit_from_selection():
return "done" return "done"
if a == "delete": elif a == "delete":
if sel.delete_url(data.get('tag'), data.get('url')): if sel.delete_url(data.get('tag'), data.get('url')):
return "ok" return "ok"
elif a == "export":
pass
return "-" return "-"
@app.route('/export', methods = ['POST'])
def export():
if request.method == 'POST':
data = request.form
a = data.get('action')
if a == "export":
if exportxml.export_selection_tag(data.get('tagid'), xml_out=data.get('exportpath')):
return 'ok'
elif a == "export_all":
if exportxml.export_selection_all():
return 'ok'
return "-"
@app.route('/ex/<path:fn>')
def ex(fn):
if fn == "all":
return render_template("ex_all.html", files=exportlist.list_all())
else:
e = exportlist.get(fn)
if e:
return render_template("chapter.html", chapter=e['chapter'])
return "Request for " + fn + " failed..."
@app.route('/report') @app.route('/report')
def report(): def report():

View File

@ -40,3 +40,21 @@ bb {
li button { li button {
margin-top: 0em; margin-top: 0em;
} }
.header {
font-weight: bold;
}
.payload {
white-space: pre-wrap;
font-family: monospace;
display: block;
unicode-bidi: embed;
border: 2px solid red;
word-wrap: break-word;
word-break: break-all;
}
.mail {
width: 35em;
}

View File

@ -26,4 +26,29 @@ $(document).ready(function(){
}); });
}); });
$('.export').click(function(e) {
var form = $(this).parent("form");
var g = form.serialize() + "&tagid=" + form.data('tagid') + "&action=export";
$('#status').text("Exporting - " + form.data('tagid'))
$.post('/export', g, function(d) {
if(d === 'ok') {
$('#status').text("Exporting - " + form.data('tagid') + " - success!!!")
} else {
$('#status').text("Exporting - " + form.data('tagid') + " - error...")
}
});
});
$('#export_all').click(function(a){
var g = "&action=export_all";
$('#status').text("Exporting - all")
$.post('/export', g, function(d) {
if(d === 'ok') {
$('#status').text("Exporting - all - success!!!")
} else {
$('#status').text("Exporting - all - error...")
}
});
});
}); });

View File

@ -0,0 +1,27 @@
<html>
<head>
<meta charset="utf-8">
<title>Chapter: {{chapter.title}}</title>
<link rel="stylesheet" href="{{ url_for('static',filename='lestyle.css') }}">
</head>
<body>
<h1>{{chapter.title}}</h1>
<h2>{{chapter.desc}}</h2>
<div id="all">
{% for m in chapter.mails.mail %}
<div class="mail">
<hr>
<div class="header">
<div class="field">From: {{m.from}}</div>
<div class="field">To: {{m.to}}</div>
<div class="field">Date: {{m.date}}</div>
<div class="field">Subject: {{m.subject}}</div>
</div>
<div class="payload">
{{m.content}}
</div>
</div>
{% endfor %}
</div>
</body>
</html>

16
www/templates/ex_all.html Normal file
View File

@ -0,0 +1,16 @@
<html>
<head>
<meta charset="utf-8">
<title>Exports [all]</title>
</head>
<body>
<h1>Exports [all] - [<a href="/search" target="_blank">SEARCH</a>, <a href="/tags" target="_blank">TAGS</a>]</h1>
<div id="all">
<ul>
{% for f in files %}
<li><a href="/ex/{{f}}">{{f}}</a></li>
{% endfor %}
</ul>
</div>
</body>
</html>

View File

@ -7,11 +7,10 @@
<script type="text/javascript" src="{{ url_for('static',filename='tags_w_lists.js') }}"></script> <script type="text/javascript" src="{{ url_for('static',filename='tags_w_lists.js') }}"></script>
</head> </head>
<body> <body>
<h1>Selection [tags w lists]</h1> <h1>Selection [tags w lists] - [<a href="/search" target="_blank">SEARCH</a>, <a href="/tags" target="_blank">TAGS</a>]</h1>
<hr> <hr>
<h2><a href="/search" target="_blank">SEARCH</a></h2> <div id="status">Ok</div>
<h2><a href="/tags" target="_blank">TAGS</a></h2> <button id="export_all">export all</button>
<!-- <button id="commit">commit</button> -->
<div id="all"> <div id="all">
{% for v in tags %} {% for v in tags %}
<hr> <hr>
@ -19,8 +18,10 @@
<form class="tag_item" id="{{v.tag}}" method="post" data-tagid="{{v.tag}}"> <form class="tag_item" id="{{v.tag}}" method="post" data-tagid="{{v.tag}}">
<input name="tag" class="tagedit" type="text" value="{{v.tag}}"> <input name="tag" class="tagedit" type="text" value="{{v.tag}}">
<textarea name="desc">{{v.desc}}</textarea> <textarea name="desc">{{v.desc}}</textarea>
<input name="exportpath" class="tagedit export" type="text" value="{{v.export}}">
<input type="submit" class="update" value="update"> <input type="submit" class="update" value="update">
<input type="submit" class="delete" value="delete"> <input type="submit" class="delete" value="delete">
<input type="submit" class="export" value="export">
</form> </form>
<lists> <lists>
<ul> <ul>