import os, json, glob, logging from selection import strutil ARCH = "archives/" EXP = "selection/" sel = os.path.join(EXP, "tm-selection.js") sel_dump = os.path.join(EXP, "tm-selection-dump.js") def load_selection(): with open(sel, encoding='utf-8') as f: d = json.load(f) return d def load_selection_dump(): with open(sel_dump, encoding='utf-8') as f: d = json.load(f) return d def lists(): return os.listdir(ARCH) def tags(): d = load_selection() return list(d.keys()) def recursive_find(msg, li, url): if msg['url'] == url: msg['list'] = li # <-- taggin return msg if 'follow-up' in list(msg.keys()): for m in msg['follow-up']: f = recursive_find(m, li, url) if f is not None: return msg # <-- parent thread return None def find(li, url): d = os.path.join(ARCH, li) if not os.path.isdir(d): logging.warning("Invalid archive path: " + d) print("Invalid archive path: " + d) return None dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))] for f in dir_files: with open(f, encoding='utf-8') as fp: dj = json.load(fp) for msg in dj['threads']: f = recursive_find(msg, li, url) if f is not None: return f return None def recursive_urls(msg): r = [msg['url']] if 'follow-up' in list(msg.keys()): for m in msg['follow-up']: r += recursive_urls(m) return r def commit_selection(li, url, tag): d = load_selection() if tag not in list(d.keys()): print("new tag: " + tag) d[tag] = [] for i in d[tag]: if i['url'] == url: return False d[tag].append({'list': li, 'url': url}) with open(sel, 'w', encoding='utf-8') as f: json.dump(d, f, ensure_ascii=False, indent=4) return True def commit_dump(li, url, tag): if not commit_selection(li, url, tag): return None m = find(li, url) # <--- time if m is not None: dump = load_selection_dump() if tag not in list(dump.keys()): dump[tag] = [] dump[tag].append(m) with open(sel_dump, 'w+', encoding='utf-8') as fout: json.dump(dump, fout, ensure_ascii=False, indent=4) commited = recursive_urls(m) return commited return None def commit_from_selection(): dump = {} d = load_selection() for k, v in d.items(): dump[k] = [] for i in v: m = find(i['list'], i['url']) # <--- time if m is not None: m['list'] = i['list'] dump[k].append(m) with open(sel_dump, 'w+', encoding='utf-8') as f: json.dump(dump, f, ensure_ascii=False, indent=4) def report(): d = load_selection() re = "Report: \n" for k, v in d.items(): lre = {} for i in v: if i['list'] not in lre: lre[i['list']] = 0 lre[i['list']] += 1 re += "<" + k + ">: " + str(len(v)) + " (" for kk, vv in lre.items(): re += kk + ": " + str(vv) + " / " re += ")\n" return re def recursive_format(msg): msg.pop('id') msg['len'] = len(msg['content']) msg.pop('content') msg.pop('content-type') if 'to' in msg: msg.pop('to') if 'message-id' in msg: msg.pop('message-id') if 'follow-up' in msg: for i in msg['follow-up']: recursive_format(i) def format_selection(): d = load_selection_dump() for k, v in d.items(): for i in v: recursive_format(i) return d def recursive_hashmap(msg, tag, hm): hm[msg['url']] = tag if 'follow-up' in msg: for i in msg['follow-up']: recursive_hashmap(i, tag, hm) def hashmap(): d = load_selection_dump() hm = {} for k, v in d.items(): for i in v: recursive_hashmap(i, k, hm) return hm if __name__ == "__main__": d = format_selection() print(json.dumps(d, indent=4, sort_keys=True)) def reorder_selection_orphans(tag): d = load_selection_dump() if tag not in list(d.keys()): return msgs = d[tag] threads = [] orphans = [] for m in msgs: if 'follow-up' in list(m.keys()): threads.append(m) else: orphans.append(m) for o in orphans: subject = o['subject'] threads.sort(key=lambda x: strutil.cosine_dist(x['subject'], subject), reverse=True) if strutil.cosine_dist(threads[0]['subject'], subject) > 0.1: msgs[msgs.index(threads[0])]['follow-up'].append(o) msgs.remove(o) d[tag] = msgs with open(sel_dump, 'w', encoding='utf-8') as f: json.dump(d, f, ensure_ascii=False, indent=4)