export
This commit is contained in:
parent
9c8f5b0e5e
commit
0b7564d44b
24
export/exportlist.py
Normal file
24
export/exportlist.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import os, logging, glob
|
||||||
|
import xmltodict, json
|
||||||
|
import config
|
||||||
|
|
||||||
|
xml_dump = os.path.join(config.export['path'], config.export['xml'])
|
||||||
|
|
||||||
|
def list_all(dirname=config.export['path'], extension="xml"):
|
||||||
|
|
||||||
|
if not os.path.isdir(dirname):
|
||||||
|
logging.error(dirname + " is not a valid directory.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return [f for f in glob.glob(os.path.join(dirname, "*." + extension))]
|
||||||
|
|
||||||
|
def get(fn, extension="xml"):
|
||||||
|
if not os.path.isfile(fn):
|
||||||
|
logging.error(fn + " is not a valid file.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if extension == "xml":
|
||||||
|
with open(fn) as fp:
|
||||||
|
dxml = fp.read()
|
||||||
|
return xmltodict.parse(dxml) #<--- this is werid parse mails/mail....
|
||||||
|
# return json.loads(json.dumps(xmltodict.parse(dxml)))
|
||||||
161
export/exportxml.py
Normal file
161
export/exportxml.py
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
import json, os
|
||||||
|
import xml.etree.ElementTree as et
|
||||||
|
import export.utils
|
||||||
|
import config
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
nn = 0
|
||||||
|
|
||||||
|
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
|
||||||
|
xml_dump = os.path.join(config.export['path'], config.export['xml'])
|
||||||
|
|
||||||
|
|
||||||
|
def export_generate_path(tag):
|
||||||
|
now = datetime.now()
|
||||||
|
return os.path.join(config.export['path'], tag + "_[now].xml")
|
||||||
|
|
||||||
|
def emit_mail_xml(msg, xmlel):
|
||||||
|
|
||||||
|
global nn
|
||||||
|
nn += 1
|
||||||
|
|
||||||
|
mail = et.SubElement(xmlel, 'mail')
|
||||||
|
|
||||||
|
subject = et.SubElement(mail, 'subject')
|
||||||
|
subject.text = export.utils.format_subject(msg['subject'])
|
||||||
|
|
||||||
|
to = et.SubElement(mail, 'to')
|
||||||
|
if 'to' in msg:
|
||||||
|
to.text = msg['to']
|
||||||
|
else:
|
||||||
|
to.text = 'n/a'
|
||||||
|
|
||||||
|
from_ = et.SubElement(mail, 'from')
|
||||||
|
from_.text = msg['from']
|
||||||
|
|
||||||
|
date = et.SubElement(mail, 'date')
|
||||||
|
date.text = msg['date']
|
||||||
|
|
||||||
|
content = et.SubElement(mail, 'content')
|
||||||
|
content.text = export.utils.format_content(msg['content'])
|
||||||
|
|
||||||
|
# recursuve "follow-up"
|
||||||
|
if 'follow-up' in msg:
|
||||||
|
followups = export.utils.index_follow_up(msg)
|
||||||
|
followups.sort(key=lambda tup: tup[0])
|
||||||
|
for d, f in followups:
|
||||||
|
emit_mail_xml(f, xmlel)
|
||||||
|
|
||||||
|
|
||||||
|
#------------------------------------------------------------
|
||||||
|
# The following functions parse the selection files
|
||||||
|
#------------------------------------------------------------
|
||||||
|
|
||||||
|
def export_single_tag(t, sel, fout):
|
||||||
|
|
||||||
|
if t not in list(sel.keys()):
|
||||||
|
logging.error("Tag: " + t + " does not exists.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
ch = sel[t]
|
||||||
|
|
||||||
|
chapter = et.Element('chapter')
|
||||||
|
chapter_title = et.SubElement(chapter, 'title')
|
||||||
|
chapter_title.text = t
|
||||||
|
|
||||||
|
chapter_desc = et.SubElement(chapter, 'desc')
|
||||||
|
chapter_desc.text = ch['desc']
|
||||||
|
|
||||||
|
chapter_mails = et.SubElement(chapter, 'mails')
|
||||||
|
|
||||||
|
for m in ch['lists']:
|
||||||
|
emit_mail_xml(m, chapter_mails)
|
||||||
|
|
||||||
|
fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump):
|
||||||
|
|
||||||
|
with open(sel_dump) as fin:
|
||||||
|
d = json.load(fin)
|
||||||
|
|
||||||
|
with open(xml_dump, 'w') as fout:
|
||||||
|
for k in d.keys():
|
||||||
|
if not export_single_tag(k, d, fout):
|
||||||
|
logging.error("Error exporting: " + k)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump):
|
||||||
|
|
||||||
|
with open(sel_dump) as fin:
|
||||||
|
d = json.load(fin)
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S"))
|
||||||
|
|
||||||
|
with open(xml_out, 'w') as fout:
|
||||||
|
if not export_single_tag(tag, d, fout):
|
||||||
|
logging.error("Error exporting: " + tag)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#------------------------------------------------------------
|
||||||
|
# The following functions parse the archive files directly
|
||||||
|
#------------------------------------------------------------
|
||||||
|
|
||||||
|
def export_file(f, fout):
|
||||||
|
|
||||||
|
with open(f) as fp:
|
||||||
|
d = json.load(fp)
|
||||||
|
|
||||||
|
all_mail = et.Element('all')
|
||||||
|
for t in d['threads']:
|
||||||
|
emit_mail_xml(t, all_mail)
|
||||||
|
|
||||||
|
fout.write(et.tostring(all_mail).decode('utf-8', 'ignore'))
|
||||||
|
|
||||||
|
def parse_date_file(fname):
|
||||||
|
return datetime.strptime(fname, '%B_%Y.json')
|
||||||
|
|
||||||
|
def export_year(d, dt, fout):
|
||||||
|
|
||||||
|
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
|
||||||
|
|
||||||
|
chapter = et.Element('chapter')
|
||||||
|
year = et.SubElement(chapter, 'year')
|
||||||
|
year.text = dt.strftime('%Y')
|
||||||
|
|
||||||
|
# SORT MONTHS BEFORE WRITING TO XML
|
||||||
|
dates = []
|
||||||
|
for f in dir_files:
|
||||||
|
|
||||||
|
fdt = parse_date_file(os.path.basename(f))
|
||||||
|
if dt.year != fdt.year:
|
||||||
|
continue
|
||||||
|
|
||||||
|
dates.append((fdt, f))
|
||||||
|
|
||||||
|
dates.sort(key=lambda tup: tup[0])
|
||||||
|
|
||||||
|
for d, f in dates:
|
||||||
|
|
||||||
|
logging.debug(f)
|
||||||
|
|
||||||
|
section = et.SubElement(chapter, 'section')
|
||||||
|
month = et.SubElement(section, 'month')
|
||||||
|
month.text = d.strftime('%B')
|
||||||
|
|
||||||
|
with open(f) as fp:
|
||||||
|
dj = json.load(fp)
|
||||||
|
|
||||||
|
mails = et.SubElement(section, 'mails')
|
||||||
|
for t in dj['threads']:
|
||||||
|
emit_mail_xml(t, mails)
|
||||||
|
|
||||||
|
# write utf8 to file (et.tostring are bytes)
|
||||||
|
# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))
|
||||||
|
fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))
|
||||||
62
export/utils.py
Normal file
62
export/utils.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import regex as re
|
||||||
|
import email.utils, logging
|
||||||
|
|
||||||
|
xml_re = re.compile('[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]')
|
||||||
|
nl_re = re.compile('\n\n\n')
|
||||||
|
ind_re = re.compile('--------------------------------------------------------------------------')
|
||||||
|
|
||||||
|
def format_subject(s):
|
||||||
|
return ' '.join(s.split())
|
||||||
|
|
||||||
|
def format_content(c):
|
||||||
|
|
||||||
|
c = c.strip()
|
||||||
|
|
||||||
|
# new lines
|
||||||
|
c = re.sub(nl_re, '\n', c)
|
||||||
|
|
||||||
|
## weird stuff
|
||||||
|
# 1. indesign automatic overset... (? dunno why ?)
|
||||||
|
# ex: Sat, 22 Nov 1997 18:23:59 -0500 (The Mattel Crackdown -- Nettime)
|
||||||
|
c = re.sub(ind_re, '', c)
|
||||||
|
|
||||||
|
return c
|
||||||
|
|
||||||
|
# returns utc timestamp
|
||||||
|
def parse_date_msg(msg):
|
||||||
|
date_str = msg['date']
|
||||||
|
time_tz = None
|
||||||
|
try:
|
||||||
|
date_tz = email.utils.parsedate_tz(date_str)
|
||||||
|
time_tz = email.utils.mktime_tz(date_tz) #utc timestamp
|
||||||
|
except TypeError:
|
||||||
|
logging.warning("Format Date TypeError")
|
||||||
|
loggin.warning(" > " + date_str)
|
||||||
|
return None
|
||||||
|
except ValueError:
|
||||||
|
loggin.warning("Format Date ValueError")
|
||||||
|
loggin.warning(" > " + date_str)
|
||||||
|
return None
|
||||||
|
except Exception as ex:
|
||||||
|
print(ex)
|
||||||
|
finally:
|
||||||
|
return time_tz
|
||||||
|
|
||||||
|
# recursive
|
||||||
|
def index_follow_up(msg):
|
||||||
|
r = []
|
||||||
|
if 'follow-up' in msg:
|
||||||
|
for m in msg['follow-up']:
|
||||||
|
d = parse_date_msg(m)
|
||||||
|
if d is None:
|
||||||
|
d = parse_date_msg(msg) # same as parent
|
||||||
|
r.append((d, m))
|
||||||
|
r += index_follow_up(m)
|
||||||
|
return r
|
||||||
|
|
||||||
|
# See for Nevejan?s research
|
||||||
|
def remove_invalid_xml_characters(s):
|
||||||
|
return re.sub(xml_re, '', s)
|
||||||
|
# return re.sub(r'[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]', '', s)
|
||||||
|
# return re.sub(r'\p{Cc}-[\r\n\t]+', '', s)
|
||||||
@ -2,6 +2,7 @@ import os, json, glob, logging
|
|||||||
from selection import strutil
|
from selection import strutil
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
import config
|
import config
|
||||||
|
import export.exportxml
|
||||||
|
|
||||||
sel = os.path.join(config.selection['path'], config.selection['sel'])
|
sel = os.path.join(config.selection['path'], config.selection['sel'])
|
||||||
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
|
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
|
||||||
@ -133,6 +134,7 @@ def tags_w_lists():
|
|||||||
for m in v['lists']:
|
for m in v['lists']:
|
||||||
l += recursive_info(m, keep_hierachy=True)
|
l += recursive_info(m, keep_hierachy=True)
|
||||||
t['lists'] = l
|
t['lists'] = l
|
||||||
|
t['export'] = export.exportxml.export_generate_path(k)
|
||||||
tags.append(t)
|
tags.append(t)
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@ from flask import render_template, request, jsonify, send_from_directory
|
|||||||
from www import app
|
from www import app
|
||||||
import json, logging
|
import json, logging
|
||||||
from selection import sel
|
from selection import sel
|
||||||
|
from export import exportxml, exportlist
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import config
|
import config
|
||||||
@ -48,11 +49,35 @@ def tags_w_lists():
|
|||||||
if a == "dump":
|
if a == "dump":
|
||||||
if sel.commit_from_selection():
|
if sel.commit_from_selection():
|
||||||
return "done"
|
return "done"
|
||||||
if a == "delete":
|
elif a == "delete":
|
||||||
if sel.delete_url(data.get('tag'), data.get('url')):
|
if sel.delete_url(data.get('tag'), data.get('url')):
|
||||||
return "ok"
|
return "ok"
|
||||||
|
elif a == "export":
|
||||||
|
pass
|
||||||
return "-"
|
return "-"
|
||||||
|
|
||||||
|
@app.route('/export', methods = ['POST'])
|
||||||
|
def export():
|
||||||
|
if request.method == 'POST':
|
||||||
|
data = request.form
|
||||||
|
a = data.get('action')
|
||||||
|
if a == "export":
|
||||||
|
if exportxml.export_selection_tag(data.get('tagid'), xml_out=data.get('exportpath')):
|
||||||
|
return 'ok'
|
||||||
|
elif a == "export_all":
|
||||||
|
if exportxml.export_selection_all():
|
||||||
|
return 'ok'
|
||||||
|
return "-"
|
||||||
|
|
||||||
|
@app.route('/ex/<path:fn>')
|
||||||
|
def ex(fn):
|
||||||
|
if fn == "all":
|
||||||
|
return render_template("ex_all.html", files=exportlist.list_all())
|
||||||
|
else:
|
||||||
|
e = exportlist.get(fn)
|
||||||
|
if e:
|
||||||
|
return render_template("chapter.html", chapter=e['chapter'])
|
||||||
|
return "Request for " + fn + " failed..."
|
||||||
|
|
||||||
@app.route('/report')
|
@app.route('/report')
|
||||||
def report():
|
def report():
|
||||||
|
|||||||
@ -40,3 +40,21 @@ bb {
|
|||||||
li button {
|
li button {
|
||||||
margin-top: 0em;
|
margin-top: 0em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.header {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.payload {
|
||||||
|
white-space: pre-wrap;
|
||||||
|
font-family: monospace;
|
||||||
|
display: block;
|
||||||
|
unicode-bidi: embed;
|
||||||
|
border: 2px solid red;
|
||||||
|
word-wrap: break-word;
|
||||||
|
word-break: break-all;
|
||||||
|
}
|
||||||
|
|
||||||
|
.mail {
|
||||||
|
width: 35em;
|
||||||
|
}
|
||||||
|
|||||||
@ -26,4 +26,29 @@ $(document).ready(function(){
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
$('.export').click(function(e) {
|
||||||
|
var form = $(this).parent("form");
|
||||||
|
var g = form.serialize() + "&tagid=" + form.data('tagid') + "&action=export";
|
||||||
|
$('#status').text("Exporting - " + form.data('tagid'))
|
||||||
|
$.post('/export', g, function(d) {
|
||||||
|
if(d === 'ok') {
|
||||||
|
$('#status').text("Exporting - " + form.data('tagid') + " - success!!!")
|
||||||
|
} else {
|
||||||
|
$('#status').text("Exporting - " + form.data('tagid') + " - error...")
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
$('#export_all').click(function(a){
|
||||||
|
var g = "&action=export_all";
|
||||||
|
$('#status').text("Exporting - all")
|
||||||
|
$.post('/export', g, function(d) {
|
||||||
|
if(d === 'ok') {
|
||||||
|
$('#status').text("Exporting - all - success!!!")
|
||||||
|
} else {
|
||||||
|
$('#status').text("Exporting - all - error...")
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
27
www/templates/chapter.html
Normal file
27
www/templates/chapter.html
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Chapter: {{chapter.title}}</title>
|
||||||
|
<link rel="stylesheet" href="{{ url_for('static',filename='lestyle.css') }}">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>{{chapter.title}}</h1>
|
||||||
|
<h2>{{chapter.desc}}</h2>
|
||||||
|
<div id="all">
|
||||||
|
{% for m in chapter.mails.mail %}
|
||||||
|
<div class="mail">
|
||||||
|
<hr>
|
||||||
|
<div class="header">
|
||||||
|
<div class="field">From: {{m.from}}</div>
|
||||||
|
<div class="field">To: {{m.to}}</div>
|
||||||
|
<div class="field">Date: {{m.date}}</div>
|
||||||
|
<div class="field">Subject: {{m.subject}}</div>
|
||||||
|
</div>
|
||||||
|
<div class="payload">
|
||||||
|
{{m.content}}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
16
www/templates/ex_all.html
Normal file
16
www/templates/ex_all.html
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Exports [all]</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Exports [all] - [<a href="/search" target="_blank">SEARCH</a>, <a href="/tags" target="_blank">TAGS</a>]</h1>
|
||||||
|
<div id="all">
|
||||||
|
<ul>
|
||||||
|
{% for f in files %}
|
||||||
|
<li><a href="/ex/{{f}}">{{f}}</a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@ -7,11 +7,10 @@
|
|||||||
<script type="text/javascript" src="{{ url_for('static',filename='tags_w_lists.js') }}"></script>
|
<script type="text/javascript" src="{{ url_for('static',filename='tags_w_lists.js') }}"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h1>Selection [tags w lists]</h1>
|
<h1>Selection [tags w lists] - [<a href="/search" target="_blank">SEARCH</a>, <a href="/tags" target="_blank">TAGS</a>]</h1>
|
||||||
<hr>
|
<hr>
|
||||||
<h2><a href="/search" target="_blank">SEARCH</a></h2>
|
<div id="status">Ok</div>
|
||||||
<h2><a href="/tags" target="_blank">TAGS</a></h2>
|
<button id="export_all">export all</button>
|
||||||
<!-- <button id="commit">commit</button> -->
|
|
||||||
<div id="all">
|
<div id="all">
|
||||||
{% for v in tags %}
|
{% for v in tags %}
|
||||||
<hr>
|
<hr>
|
||||||
@ -19,8 +18,10 @@
|
|||||||
<form class="tag_item" id="{{v.tag}}" method="post" data-tagid="{{v.tag}}">
|
<form class="tag_item" id="{{v.tag}}" method="post" data-tagid="{{v.tag}}">
|
||||||
<input name="tag" class="tagedit" type="text" value="{{v.tag}}">
|
<input name="tag" class="tagedit" type="text" value="{{v.tag}}">
|
||||||
<textarea name="desc">{{v.desc}}</textarea>
|
<textarea name="desc">{{v.desc}}</textarea>
|
||||||
|
<input name="exportpath" class="tagedit export" type="text" value="{{v.export}}">
|
||||||
<input type="submit" class="update" value="update">
|
<input type="submit" class="update" value="update">
|
||||||
<input type="submit" class="delete" value="delete">
|
<input type="submit" class="delete" value="delete">
|
||||||
|
<input type="submit" class="export" value="export">
|
||||||
</form>
|
</form>
|
||||||
<lists>
|
<lists>
|
||||||
<ul>
|
<ul>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user