2019-12-09 13:45:24 +01:00
|
|
|
import os, json, glob, logging
|
2019-12-21 14:13:16 +01:00
|
|
|
from selection import strutil
|
2019-12-23 14:54:12 +01:00
|
|
|
from threading import Lock
|
|
|
|
|
import config
|
2019-12-26 18:12:49 +01:00
|
|
|
import export.exportxml
|
2019-12-23 14:54:12 +01:00
|
|
|
|
2019-12-26 12:46:45 +01:00
|
|
|
sel = os.path.join(config.selection['path'], config.selection['sel'])
|
|
|
|
|
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
|
2019-12-23 14:54:12 +01:00
|
|
|
|
|
|
|
|
LL = Lock()
|
|
|
|
|
|
2019-12-26 11:42:35 +01:00
|
|
|
# TAGS
|
2019-12-23 14:54:12 +01:00
|
|
|
def update(tag, newtag, newdesc):
|
|
|
|
|
with LL:
|
|
|
|
|
d = load_selection()
|
|
|
|
|
if not tag in list(d.keys()):
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
if newtag != tag:
|
|
|
|
|
d[newtag] = d.pop(tag)
|
|
|
|
|
else:
|
|
|
|
|
d[tag]['desc'] = newdesc
|
|
|
|
|
write_selection(d)
|
|
|
|
|
|
|
|
|
|
sd = load_selection_dump()
|
|
|
|
|
if not tag in list(sd.keys()):
|
|
|
|
|
logging.warning("possible inconsistency between sel and sel_dump...")
|
|
|
|
|
else:
|
|
|
|
|
if newtag != tag:
|
|
|
|
|
sd[newtag] = sd.pop(tag)
|
|
|
|
|
else:
|
|
|
|
|
sd[tag]['desc'] = newdesc
|
|
|
|
|
write_selection_dump(sd)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def delete(tag):
|
|
|
|
|
with LL:
|
|
|
|
|
d = load_selection()
|
|
|
|
|
if not tag in list(d.keys()):
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
d.pop(tag)
|
|
|
|
|
write_selection(d)
|
|
|
|
|
|
|
|
|
|
sd = load_selection_dump()
|
|
|
|
|
if not tag in list(sd.keys()):
|
|
|
|
|
logging.warning("possible inconsistency between sel and sel_dump...")
|
|
|
|
|
else:
|
|
|
|
|
sd.pop(tag)
|
|
|
|
|
write_selection_dump(sd)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def new(tag, desc):
|
|
|
|
|
with LL:
|
|
|
|
|
d = load_selection()
|
|
|
|
|
if tag in list(d.keys()):
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
d[tag] = {"desc": desc, "lists": []}
|
|
|
|
|
write_selection(d)
|
|
|
|
|
|
|
|
|
|
sd = load_selection_dump()
|
|
|
|
|
sd[tag] = {"desc": desc, "lists": []}
|
|
|
|
|
write_selection_dump(sd)
|
|
|
|
|
|
|
|
|
|
return True
|
2019-12-09 13:45:24 +01:00
|
|
|
|
2019-12-26 11:42:35 +01:00
|
|
|
# URL
|
|
|
|
|
def delete_url(tag, url):
|
|
|
|
|
with LL:
|
|
|
|
|
d = load_selection()
|
|
|
|
|
if tag not in list(d.keys()):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
for m in d[tag]['lists']:
|
|
|
|
|
if m['url'] == url:
|
|
|
|
|
d[tag]['lists'].remove(m)
|
|
|
|
|
break
|
|
|
|
|
write_selection(d)
|
|
|
|
|
|
|
|
|
|
sd = load_selection_dump()
|
|
|
|
|
if not tag in list(sd.keys()):
|
|
|
|
|
logging.warning("possible inconsistency between sel and sel_dump...")
|
|
|
|
|
else:
|
|
|
|
|
for m in sd[tag]['lists']:
|
|
|
|
|
if recursive_delete(m, sd[tag]['lists'], url):
|
|
|
|
|
break
|
|
|
|
|
write_selection_dump(sd)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
2019-12-09 13:45:24 +01:00
|
|
|
|
2019-12-21 14:13:16 +01:00
|
|
|
def load_selection():
|
|
|
|
|
with open(sel, encoding='utf-8') as f:
|
|
|
|
|
d = json.load(f)
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
def load_selection_dump():
|
|
|
|
|
with open(sel_dump, encoding='utf-8') as f:
|
|
|
|
|
d = json.load(f)
|
|
|
|
|
return d
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
def write_selection(d):
|
|
|
|
|
with open(sel, 'w+', encoding='utf-8') as f:
|
|
|
|
|
json.dump(d, f, indent=4)
|
|
|
|
|
|
|
|
|
|
def write_selection_dump(d):
|
|
|
|
|
with open(sel_dump, 'w+', encoding='utf-8') as f:
|
|
|
|
|
json.dump(d, f, indent=4)
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
def lists():
|
2019-12-23 14:54:12 +01:00
|
|
|
return os.listdir(config.archives)
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
def tags():
|
2019-12-23 14:54:12 +01:00
|
|
|
d = load_selection()
|
|
|
|
|
tags = []
|
|
|
|
|
for k, v in d.items():
|
|
|
|
|
tags.append({'tag': k, 'desc': v['desc']})
|
|
|
|
|
return tags
|
|
|
|
|
|
|
|
|
|
def tags_list():
|
2019-12-21 14:13:16 +01:00
|
|
|
d = load_selection()
|
2019-12-09 13:45:24 +01:00
|
|
|
return list(d.keys())
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
|
|
|
|
|
def tags_w_lists():
|
|
|
|
|
d = load_selection_dump()
|
|
|
|
|
tags = []
|
|
|
|
|
for k, v in d.items():
|
|
|
|
|
t = {'tag': k, 'desc': v['desc']}
|
|
|
|
|
l = []
|
|
|
|
|
for m in v['lists']:
|
2019-12-26 12:12:07 +01:00
|
|
|
l += recursive_info(m, keep_hierachy=True)
|
2019-12-23 14:54:12 +01:00
|
|
|
t['lists'] = l
|
2019-12-26 18:12:49 +01:00
|
|
|
t['export'] = export.exportxml.export_generate_path(k)
|
2019-12-23 14:54:12 +01:00
|
|
|
tags.append(t)
|
|
|
|
|
return tags
|
|
|
|
|
|
2019-12-21 14:13:16 +01:00
|
|
|
def recursive_find(msg, li, url):
|
|
|
|
|
if msg['url'] == url:
|
|
|
|
|
msg['list'] = li # <-- taggin
|
|
|
|
|
return msg
|
|
|
|
|
if 'follow-up' in list(msg.keys()):
|
|
|
|
|
for m in msg['follow-up']:
|
|
|
|
|
f = recursive_find(m, li, url)
|
|
|
|
|
if f is not None:
|
|
|
|
|
return msg # <-- parent thread
|
|
|
|
|
return None
|
2019-12-09 13:45:24 +01:00
|
|
|
|
2019-12-26 11:42:35 +01:00
|
|
|
def recursive_delete(msg, parent_list, url):
|
|
|
|
|
if msg['url'] == url:
|
|
|
|
|
parent_list.remove(msg)
|
|
|
|
|
return True
|
|
|
|
|
d = False
|
|
|
|
|
if 'follow-up' in list(msg.keys()):
|
|
|
|
|
for m in msg['follow-up']:
|
|
|
|
|
d = d | recursive_delete(m, msg['follow-up'], url)
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2019-12-09 13:45:24 +01:00
|
|
|
def find(li, url):
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
d = os.path.join(config.archives, li)
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
if not os.path.isdir(d):
|
|
|
|
|
logging.warning("Invalid archive path: " + d)
|
|
|
|
|
print("Invalid archive path: " + d)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
|
|
|
|
|
|
|
|
|
|
for f in dir_files:
|
2019-12-09 14:22:26 +01:00
|
|
|
with open(f, encoding='utf-8') as fp:
|
2019-12-09 13:45:24 +01:00
|
|
|
dj = json.load(fp)
|
2019-12-21 14:13:16 +01:00
|
|
|
|
|
|
|
|
for msg in dj['threads']:
|
|
|
|
|
f = recursive_find(msg, li, url)
|
|
|
|
|
if f is not None:
|
|
|
|
|
return f
|
|
|
|
|
|
2019-12-09 13:45:24 +01:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def recursive_urls(msg):
|
|
|
|
|
r = [msg['url']]
|
|
|
|
|
if 'follow-up' in list(msg.keys()):
|
|
|
|
|
for m in msg['follow-up']:
|
|
|
|
|
r += recursive_urls(m)
|
|
|
|
|
return r
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
# <li><a href="' + h.url+ '" target="_blank">' + h.subject + '</a> -- <i>' + h.author_name + '</i>
|
2019-12-26 12:12:07 +01:00
|
|
|
def recursive_info(msg, keep_hierachy=False):
|
|
|
|
|
|
2020-01-12 12:16:10 +01:00
|
|
|
print(msg['url'])
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
r = [{'url': msg['url'], 'subject': msg['subject'], 'author_name': msg['author_name']}]
|
2019-12-26 12:12:07 +01:00
|
|
|
|
|
|
|
|
if keep_hierachy:
|
|
|
|
|
rr = []
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
if 'follow-up' in list(msg.keys()):
|
|
|
|
|
for m in msg['follow-up']:
|
2019-12-26 12:12:07 +01:00
|
|
|
if keep_hierachy:
|
|
|
|
|
rr += recursive_info(m)
|
|
|
|
|
else:
|
|
|
|
|
r += recursive_info(m)
|
|
|
|
|
|
|
|
|
|
if keep_hierachy:
|
|
|
|
|
r[0]['follow'] = rr #note: change of field name
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
return r
|
|
|
|
|
|
2019-12-09 13:45:24 +01:00
|
|
|
def commit_selection(li, url, tag):
|
|
|
|
|
|
2019-12-21 14:13:16 +01:00
|
|
|
d = load_selection()
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
for i in d[tag]['lists']:
|
2019-12-09 13:45:24 +01:00
|
|
|
if i['url'] == url:
|
|
|
|
|
return False
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
d[tag]['lists'].append({'list': li, 'url': url})
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
with open(sel, 'w', encoding='utf-8') as f:
|
|
|
|
|
json.dump(d, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def commit_dump(li, url, tag):
|
|
|
|
|
|
|
|
|
|
if not commit_selection(li, url, tag):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
m = find(li, url) # <--- time
|
2019-12-23 14:54:12 +01:00
|
|
|
|
2019-12-09 13:45:24 +01:00
|
|
|
if m is not None:
|
|
|
|
|
|
2019-12-21 14:13:16 +01:00
|
|
|
dump = load_selection_dump()
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
if tag not in list(dump.keys()):
|
2019-12-23 14:54:12 +01:00
|
|
|
dump[tag] = {"desc": desc, "lists": []}
|
2019-12-09 13:45:24 +01:00
|
|
|
|
2019-12-27 15:30:07 +01:00
|
|
|
m['list'] = li
|
2019-12-23 14:54:12 +01:00
|
|
|
dump[tag]['lists'].append(m)
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
with open(sel_dump, 'w+', encoding='utf-8') as fout:
|
|
|
|
|
json.dump(dump, fout, ensure_ascii=False, indent=4)
|
|
|
|
|
|
|
|
|
|
commited = recursive_urls(m)
|
|
|
|
|
return commited
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def commit_from_selection():
|
|
|
|
|
|
|
|
|
|
dump = {}
|
2019-12-21 14:13:16 +01:00
|
|
|
d = load_selection()
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
for k, v in d.items():
|
2019-12-23 14:54:12 +01:00
|
|
|
dump[k] = {'desc': v['desc'], 'lists': []}
|
|
|
|
|
for i in v['lists']:
|
2019-12-09 13:45:24 +01:00
|
|
|
m = find(i['list'], i['url']) # <--- time
|
|
|
|
|
if m is not None:
|
|
|
|
|
m['list'] = i['list']
|
2019-12-23 14:54:12 +01:00
|
|
|
dump[k]['lists'].append(m)
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
with open(sel_dump, 'w+', encoding='utf-8') as f:
|
|
|
|
|
json.dump(dump, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
return True
|
2019-12-09 13:45:24 +01:00
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
# def report():
|
2019-12-21 14:13:16 +01:00
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
# d = load_selection()
|
|
|
|
|
|
|
|
|
|
# re = "Report: \n"
|
|
|
|
|
# for k, v in d.items():
|
|
|
|
|
# lre = {}
|
|
|
|
|
# for i in v:
|
|
|
|
|
# if i['list'] not in lre:
|
|
|
|
|
# lre[i['list']] = 0
|
|
|
|
|
# lre[i['list']] += 1
|
|
|
|
|
# re += "<" + k + ">: " + str(len(v)) + " ("
|
|
|
|
|
# for kk, vv in lre.items():
|
|
|
|
|
# re += kk + ": " + str(vv) + " / "
|
|
|
|
|
# re += ")\n"
|
|
|
|
|
|
|
|
|
|
# return re
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
def recursive_format(msg):
|
|
|
|
|
msg.pop('id')
|
|
|
|
|
msg['len'] = len(msg['content'])
|
|
|
|
|
msg.pop('content')
|
|
|
|
|
msg.pop('content-type')
|
|
|
|
|
if 'to' in msg:
|
|
|
|
|
msg.pop('to')
|
|
|
|
|
if 'message-id' in msg:
|
|
|
|
|
msg.pop('message-id')
|
|
|
|
|
if 'follow-up' in msg:
|
|
|
|
|
for i in msg['follow-up']:
|
|
|
|
|
recursive_format(i)
|
|
|
|
|
|
|
|
|
|
def format_selection():
|
|
|
|
|
|
2019-12-21 14:13:16 +01:00
|
|
|
d = load_selection_dump()
|
2019-12-09 13:45:24 +01:00
|
|
|
|
|
|
|
|
for k, v in d.items():
|
2019-12-23 14:54:12 +01:00
|
|
|
for i in v['lists']:
|
2019-12-09 13:45:24 +01:00
|
|
|
recursive_format(i)
|
|
|
|
|
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
def recursive_hashmap(msg, tag, hm):
|
|
|
|
|
hm[msg['url']] = tag
|
|
|
|
|
if 'follow-up' in msg:
|
|
|
|
|
for i in msg['follow-up']:
|
|
|
|
|
recursive_hashmap(i, tag, hm)
|
|
|
|
|
|
|
|
|
|
def hashmap():
|
2019-12-21 14:13:16 +01:00
|
|
|
d = load_selection_dump()
|
2019-12-09 13:45:24 +01:00
|
|
|
hm = {}
|
|
|
|
|
for k, v in d.items():
|
2019-12-23 14:54:12 +01:00
|
|
|
for i in v['lists']:
|
2019-12-09 13:45:24 +01:00
|
|
|
recursive_hashmap(i, k, hm)
|
|
|
|
|
return hm
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
d = format_selection()
|
|
|
|
|
print(json.dumps(d, indent=4, sort_keys=True))
|
2019-12-21 14:13:16 +01:00
|
|
|
|
|
|
|
|
def reorder_selection_orphans(tag):
|
|
|
|
|
|
|
|
|
|
d = load_selection_dump()
|
|
|
|
|
if tag not in list(d.keys()):
|
|
|
|
|
return
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
msgs = d[tag]['lists']
|
2019-12-21 14:13:16 +01:00
|
|
|
threads = []
|
|
|
|
|
orphans = []
|
|
|
|
|
for m in msgs:
|
|
|
|
|
if 'follow-up' in list(m.keys()):
|
|
|
|
|
threads.append(m)
|
|
|
|
|
else:
|
|
|
|
|
orphans.append(m)
|
|
|
|
|
|
|
|
|
|
for o in orphans:
|
|
|
|
|
subject = o['subject']
|
|
|
|
|
threads.sort(key=lambda x: strutil.cosine_dist(x['subject'], subject), reverse=True)
|
|
|
|
|
|
|
|
|
|
if strutil.cosine_dist(threads[0]['subject'], subject) > 0.1:
|
|
|
|
|
msgs[msgs.index(threads[0])]['follow-up'].append(o)
|
|
|
|
|
msgs.remove(o)
|
|
|
|
|
|
|
|
|
|
|
2019-12-23 14:54:12 +01:00
|
|
|
d[tag]['lists'] = msgs
|
2019-12-21 14:13:16 +01:00
|
|
|
with open(sel_dump, 'w', encoding='utf-8') as f:
|
|
|
|
|
json.dump(d, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2019-12-09 13:45:24 +01:00
|
|
|
|