2019-12-21 14:13:16 +01:00

238 lines
4.1 KiB
Python

import os, json, glob, logging
from selection import strutil
ARCH = "archives/"
EXP = "selection/"
sel = os.path.join(EXP, "tm-selection.js")
sel_dump = os.path.join(EXP, "tm-selection-dump.js")
def load_selection():
with open(sel, encoding='utf-8') as f:
d = json.load(f)
return d
def load_selection_dump():
with open(sel_dump, encoding='utf-8') as f:
d = json.load(f)
return d
def lists():
return os.listdir(ARCH)
def tags():
d = load_selection()
return list(d.keys())
def recursive_find(msg, li, url):
if msg['url'] == url:
msg['list'] = li # <-- taggin
return msg
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
f = recursive_find(m, li, url)
if f is not None:
return msg # <-- parent thread
return None
def find(li, url):
d = os.path.join(ARCH, li)
if not os.path.isdir(d):
logging.warning("Invalid archive path: " + d)
print("Invalid archive path: " + d)
return None
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
for f in dir_files:
with open(f, encoding='utf-8') as fp:
dj = json.load(fp)
for msg in dj['threads']:
f = recursive_find(msg, li, url)
if f is not None:
return f
return None
def recursive_urls(msg):
r = [msg['url']]
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
r += recursive_urls(m)
return r
def commit_selection(li, url, tag):
d = load_selection()
if tag not in list(d.keys()):
print("new tag: " + tag)
d[tag] = []
for i in d[tag]:
if i['url'] == url:
return False
d[tag].append({'list': li, 'url': url})
with open(sel, 'w', encoding='utf-8') as f:
json.dump(d, f, ensure_ascii=False, indent=4)
return True
def commit_dump(li, url, tag):
if not commit_selection(li, url, tag):
return None
m = find(li, url) # <--- time
if m is not None:
dump = load_selection_dump()
if tag not in list(dump.keys()):
dump[tag] = []
dump[tag].append(m)
with open(sel_dump, 'w+', encoding='utf-8') as fout:
json.dump(dump, fout, ensure_ascii=False, indent=4)
commited = recursive_urls(m)
return commited
return None
def commit_from_selection():
dump = {}
d = load_selection()
for k, v in d.items():
dump[k] = []
for i in v:
m = find(i['list'], i['url']) # <--- time
if m is not None:
m['list'] = i['list']
dump[k].append(m)
with open(sel_dump, 'w+', encoding='utf-8') as f:
json.dump(dump, f, ensure_ascii=False, indent=4)
def report():
d = load_selection()
re = "Report: \n"
for k, v in d.items():
lre = {}
for i in v:
if i['list'] not in lre:
lre[i['list']] = 0
lre[i['list']] += 1
re += "<" + k + ">: " + str(len(v)) + " ("
for kk, vv in lre.items():
re += kk + ": " + str(vv) + " / "
re += ")\n"
return re
def recursive_format(msg):
msg.pop('id')
msg['len'] = len(msg['content'])
msg.pop('content')
msg.pop('content-type')
if 'to' in msg:
msg.pop('to')
if 'message-id' in msg:
msg.pop('message-id')
if 'follow-up' in msg:
for i in msg['follow-up']:
recursive_format(i)
def format_selection():
d = load_selection_dump()
for k, v in d.items():
for i in v:
recursive_format(i)
return d
def recursive_hashmap(msg, tag, hm):
hm[msg['url']] = tag
if 'follow-up' in msg:
for i in msg['follow-up']:
recursive_hashmap(i, tag, hm)
def hashmap():
d = load_selection_dump()
hm = {}
for k, v in d.items():
for i in v:
recursive_hashmap(i, k, hm)
return hm
if __name__ == "__main__":
d = format_selection()
print(json.dumps(d, indent=4, sort_keys=True))
def reorder_selection_orphans(tag):
d = load_selection_dump()
if tag not in list(d.keys()):
return
msgs = d[tag]
threads = []
orphans = []
for m in msgs:
if 'follow-up' in list(m.keys()):
threads.append(m)
else:
orphans.append(m)
for o in orphans:
subject = o['subject']
threads.sort(key=lambda x: strutil.cosine_dist(x['subject'], subject), reverse=True)
if strutil.cosine_dist(threads[0]['subject'], subject) > 0.1:
msgs[msgs.index(threads[0])]['follow-up'].append(o)
msgs.remove(o)
d[tag] = msgs
with open(sel_dump, 'w', encoding='utf-8') as f:
json.dump(d, f, ensure_ascii=False, indent=4)