386 lines
7.4 KiB
Python
Raw Normal View History

2019-12-09 13:45:24 +01:00
import os, json, glob, logging
2019-12-21 14:13:16 +01:00
from selection import strutil
2019-12-23 14:54:12 +01:00
from threading import Lock
import config
2019-12-26 18:12:49 +01:00
import export.exportxml
2019-12-23 14:54:12 +01:00
2019-12-26 12:46:45 +01:00
sel = os.path.join(config.selection['path'], config.selection['sel'])
sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
2019-12-23 14:54:12 +01:00
LL = Lock()
2019-12-26 11:42:35 +01:00
# TAGS
2019-12-23 14:54:12 +01:00
def update(tag, newtag, newdesc):
with LL:
d = load_selection()
if not tag in list(d.keys()):
return False
else:
if newtag != tag:
d[newtag] = d.pop(tag)
else:
d[tag]['desc'] = newdesc
write_selection(d)
sd = load_selection_dump()
if not tag in list(sd.keys()):
logging.warning("possible inconsistency between sel and sel_dump...")
else:
if newtag != tag:
sd[newtag] = sd.pop(tag)
else:
sd[tag]['desc'] = newdesc
write_selection_dump(sd)
return True
def delete(tag):
with LL:
d = load_selection()
if not tag in list(d.keys()):
return False
else:
d.pop(tag)
write_selection(d)
sd = load_selection_dump()
if not tag in list(sd.keys()):
logging.warning("possible inconsistency between sel and sel_dump...")
else:
sd.pop(tag)
write_selection_dump(sd)
return True
def new(tag, desc):
with LL:
d = load_selection()
if tag in list(d.keys()):
return False
else:
d[tag] = {"desc": desc, "lists": []}
write_selection(d)
sd = load_selection_dump()
sd[tag] = {"desc": desc, "lists": []}
write_selection_dump(sd)
return True
2019-12-09 13:45:24 +01:00
2019-12-26 11:42:35 +01:00
# URL
def delete_url(tag, url):
with LL:
d = load_selection()
if tag not in list(d.keys()):
return False
for m in d[tag]['lists']:
if m['url'] == url:
d[tag]['lists'].remove(m)
break
write_selection(d)
sd = load_selection_dump()
if not tag in list(sd.keys()):
logging.warning("possible inconsistency between sel and sel_dump...")
else:
for m in sd[tag]['lists']:
if recursive_delete(m, sd[tag]['lists'], url):
break
write_selection_dump(sd)
return True
2019-12-09 13:45:24 +01:00
2019-12-21 14:13:16 +01:00
def load_selection():
with open(sel, encoding='utf-8') as f:
d = json.load(f)
return d
def load_selection_dump():
with open(sel_dump, encoding='utf-8') as f:
d = json.load(f)
return d
2019-12-23 14:54:12 +01:00
def write_selection(d):
with open(sel, 'w+', encoding='utf-8') as f:
json.dump(d, f, indent=4)
def write_selection_dump(d):
with open(sel_dump, 'w+', encoding='utf-8') as f:
json.dump(d, f, indent=4)
2019-12-09 13:45:24 +01:00
def lists():
2019-12-23 14:54:12 +01:00
return os.listdir(config.archives)
2019-12-09 13:45:24 +01:00
def tags():
2019-12-23 14:54:12 +01:00
d = load_selection()
tags = []
for k, v in d.items():
tags.append({'tag': k, 'desc': v['desc']})
return tags
def tags_list():
2019-12-21 14:13:16 +01:00
d = load_selection()
2019-12-09 13:45:24 +01:00
return list(d.keys())
2019-12-23 14:54:12 +01:00
def tags_w_lists():
d = load_selection_dump()
tags = []
for k, v in d.items():
t = {'tag': k, 'desc': v['desc']}
l = []
for m in v['lists']:
2019-12-26 12:12:07 +01:00
l += recursive_info(m, keep_hierachy=True)
2019-12-23 14:54:12 +01:00
t['lists'] = l
2019-12-26 18:12:49 +01:00
t['export'] = export.exportxml.export_generate_path(k)
2019-12-23 14:54:12 +01:00
tags.append(t)
return tags
2019-12-21 14:13:16 +01:00
def recursive_find(msg, li, url):
if msg['url'] == url:
msg['list'] = li # <-- taggin
return msg
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
f = recursive_find(m, li, url)
if f is not None:
return msg # <-- parent thread
return None
2019-12-09 13:45:24 +01:00
2019-12-26 11:42:35 +01:00
def recursive_delete(msg, parent_list, url):
if msg['url'] == url:
parent_list.remove(msg)
return True
d = False
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
d = d | recursive_delete(m, msg['follow-up'], url)
return d
2019-12-09 13:45:24 +01:00
def find(li, url):
2019-12-23 14:54:12 +01:00
d = os.path.join(config.archives, li)
2019-12-09 13:45:24 +01:00
if not os.path.isdir(d):
logging.warning("Invalid archive path: " + d)
print("Invalid archive path: " + d)
return None
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
for f in dir_files:
2019-12-09 14:22:26 +01:00
with open(f, encoding='utf-8') as fp:
2019-12-09 13:45:24 +01:00
dj = json.load(fp)
2019-12-21 14:13:16 +01:00
for msg in dj['threads']:
f = recursive_find(msg, li, url)
if f is not None:
return f
2019-12-09 13:45:24 +01:00
return None
def recursive_urls(msg):
r = [msg['url']]
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
r += recursive_urls(m)
return r
2019-12-23 14:54:12 +01:00
# <li><a href="' + h.url+ '" target="_blank">' + h.subject + '</a> -- <i>' + h.author_name + '</i>
2019-12-26 12:12:07 +01:00
def recursive_info(msg, keep_hierachy=False):
2020-01-12 12:16:10 +01:00
print(msg['url'])
2019-12-23 14:54:12 +01:00
r = [{'url': msg['url'], 'subject': msg['subject'], 'author_name': msg['author_name']}]
2019-12-26 12:12:07 +01:00
if keep_hierachy:
rr = []
2019-12-23 14:54:12 +01:00
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
2019-12-26 12:12:07 +01:00
if keep_hierachy:
rr += recursive_info(m)
else:
r += recursive_info(m)
if keep_hierachy:
r[0]['follow'] = rr #note: change of field name
2019-12-23 14:54:12 +01:00
return r
2019-12-09 13:45:24 +01:00
def commit_selection(li, url, tag):
2019-12-21 14:13:16 +01:00
d = load_selection()
2019-12-23 14:54:12 +01:00
for i in d[tag]['lists']:
2019-12-09 13:45:24 +01:00
if i['url'] == url:
return False
2019-12-23 14:54:12 +01:00
d[tag]['lists'].append({'list': li, 'url': url})
2019-12-09 13:45:24 +01:00
with open(sel, 'w', encoding='utf-8') as f:
json.dump(d, f, ensure_ascii=False, indent=4)
return True
def commit_dump(li, url, tag):
if not commit_selection(li, url, tag):
return None
m = find(li, url) # <--- time
2019-12-23 14:54:12 +01:00
2019-12-09 13:45:24 +01:00
if m is not None:
2019-12-21 14:13:16 +01:00
dump = load_selection_dump()
2019-12-09 13:45:24 +01:00
if tag not in list(dump.keys()):
2019-12-23 14:54:12 +01:00
dump[tag] = {"desc": desc, "lists": []}
2019-12-09 13:45:24 +01:00
2019-12-27 15:30:07 +01:00
m['list'] = li
2019-12-23 14:54:12 +01:00
dump[tag]['lists'].append(m)
2019-12-09 13:45:24 +01:00
with open(sel_dump, 'w+', encoding='utf-8') as fout:
json.dump(dump, fout, ensure_ascii=False, indent=4)
commited = recursive_urls(m)
return commited
return None
def commit_from_selection():
dump = {}
2019-12-21 14:13:16 +01:00
d = load_selection()
2019-12-09 13:45:24 +01:00
for k, v in d.items():
2019-12-23 14:54:12 +01:00
dump[k] = {'desc': v['desc'], 'lists': []}
for i in v['lists']:
2019-12-09 13:45:24 +01:00
m = find(i['list'], i['url']) # <--- time
if m is not None:
m['list'] = i['list']
2019-12-23 14:54:12 +01:00
dump[k]['lists'].append(m)
2019-12-09 13:45:24 +01:00
with open(sel_dump, 'w+', encoding='utf-8') as f:
json.dump(dump, f, ensure_ascii=False, indent=4)
2019-12-23 14:54:12 +01:00
return True
2019-12-09 13:45:24 +01:00
2019-12-23 14:54:12 +01:00
# def report():
2019-12-21 14:13:16 +01:00
2019-12-23 14:54:12 +01:00
# d = load_selection()
# re = "Report: \n"
# for k, v in d.items():
# lre = {}
# for i in v:
# if i['list'] not in lre:
# lre[i['list']] = 0
# lre[i['list']] += 1
# re += "<" + k + ">: " + str(len(v)) + " ("
# for kk, vv in lre.items():
# re += kk + ": " + str(vv) + " / "
# re += ")\n"
# return re
2019-12-09 13:45:24 +01:00
def recursive_format(msg):
msg.pop('id')
msg['len'] = len(msg['content'])
msg.pop('content')
msg.pop('content-type')
if 'to' in msg:
msg.pop('to')
if 'message-id' in msg:
msg.pop('message-id')
if 'follow-up' in msg:
for i in msg['follow-up']:
recursive_format(i)
def format_selection():
2019-12-21 14:13:16 +01:00
d = load_selection_dump()
2019-12-09 13:45:24 +01:00
for k, v in d.items():
2019-12-23 14:54:12 +01:00
for i in v['lists']:
2019-12-09 13:45:24 +01:00
recursive_format(i)
return d
def recursive_hashmap(msg, tag, hm):
hm[msg['url']] = tag
if 'follow-up' in msg:
for i in msg['follow-up']:
recursive_hashmap(i, tag, hm)
def hashmap():
2019-12-21 14:13:16 +01:00
d = load_selection_dump()
2019-12-09 13:45:24 +01:00
hm = {}
for k, v in d.items():
2019-12-23 14:54:12 +01:00
for i in v['lists']:
2019-12-09 13:45:24 +01:00
recursive_hashmap(i, k, hm)
return hm
if __name__ == "__main__":
d = format_selection()
print(json.dumps(d, indent=4, sort_keys=True))
2019-12-21 14:13:16 +01:00
def reorder_selection_orphans(tag):
d = load_selection_dump()
if tag not in list(d.keys()):
return
2019-12-23 14:54:12 +01:00
msgs = d[tag]['lists']
2019-12-21 14:13:16 +01:00
threads = []
orphans = []
for m in msgs:
if 'follow-up' in list(m.keys()):
threads.append(m)
else:
orphans.append(m)
for o in orphans:
subject = o['subject']
threads.sort(key=lambda x: strutil.cosine_dist(x['subject'], subject), reverse=True)
if strutil.cosine_dist(threads[0]['subject'], subject) > 0.1:
msgs[msgs.index(threads[0])]['follow-up'].append(o)
msgs.remove(o)
2019-12-23 14:54:12 +01:00
d[tag]['lists'] = msgs
2019-12-21 14:13:16 +01:00
with open(sel_dump, 'w', encoding='utf-8') as f:
json.dump(d, f, ensure_ascii=False, indent=4)
2019-12-09 13:45:24 +01:00