21.dec.2019 a

This commit is contained in:
gauthiier 2019-12-21 14:13:16 +01:00
parent 09aca9fd25
commit 74fb0d6f0b
8 changed files with 3862 additions and 2328 deletions

View File

@ -1,20 +1,39 @@
import os, json, glob, logging
from selection import strutil
ARCH = "archives/"
EXP = "selection/"
sel = os.path.join(EXP, "tm-selection.js")
sel_dump = os.path.join(EXP, "tm-selection-dump.js")
with open(sel, encoding='utf-8') as f:
d = json.load(f)
def load_selection():
with open(sel, encoding='utf-8') as f:
d = json.load(f)
return d
def load_selection_dump():
with open(sel_dump, encoding='utf-8') as f:
d = json.load(f)
return d
def lists():
return os.listdir(sel.ARCH)
return os.listdir(ARCH)
def tags():
global d
d = load_selection()
return list(d.keys())
def recursive_find(msg, li, url):
if msg['url'] == url:
msg['list'] = li # <-- taggin
return msg
if 'follow-up' in list(msg.keys()):
for m in msg['follow-up']:
f = recursive_find(m, li, url)
if f is not None:
return msg # <-- parent thread
return None
def find(li, url):
@ -30,9 +49,12 @@ def find(li, url):
for f in dir_files:
with open(f, encoding='utf-8') as fp:
dj = json.load(fp)
for t in dj['threads']:
if t['url'] == url: # one level..... not recursive
return t
for msg in dj['threads']:
f = recursive_find(msg, li, url)
if f is not None:
return f
return None
def recursive_urls(msg):
@ -44,6 +66,8 @@ def recursive_urls(msg):
def commit_selection(li, url, tag):
d = load_selection()
if tag not in list(d.keys()):
print("new tag: " + tag)
d[tag] = []
@ -67,8 +91,7 @@ def commit_dump(li, url, tag):
m = find(li, url) # <--- time
if m is not None:
with open(sel_dump, encoding='utf-8') as f:
dump = json.load(f)
dump = load_selection_dump()
if tag not in list(dump.keys()):
dump[tag] = []
@ -83,12 +106,10 @@ def commit_dump(li, url, tag):
return None
def commit_from_selection():
dump = {}
with open(sel, encoding='utf-8') as f:
d = json.load(f)
d = load_selection()
for k, v in d.items():
dump[k] = []
@ -103,6 +124,8 @@ def commit_from_selection():
def report():
d = load_selection()
re = "Report: \n"
for k, v in d.items():
lre = {}
@ -132,8 +155,7 @@ def recursive_format(msg):
def format_selection():
with open(sel_dump, encoding='utf-8') as f:
d = json.load(f)
d = load_selection_dump()
for k, v in d.items():
for i in v:
@ -148,8 +170,7 @@ def recursive_hashmap(msg, tag, hm):
recursive_hashmap(i, tag, hm)
def hashmap():
with open(sel_dump, encoding='utf-8') as f:
d = json.load(f)
d = load_selection_dump()
hm = {}
for k, v in d.items():
for i in v:
@ -160,4 +181,58 @@ if __name__ == "__main__":
d = format_selection()
print(json.dumps(d, indent=4, sort_keys=True))
def reorder_selection_orphans(tag):
d = load_selection_dump()
if tag not in list(d.keys()):
return
msgs = d[tag]
threads = []
orphans = []
for m in msgs:
if 'follow-up' in list(m.keys()):
threads.append(m)
else:
orphans.append(m)
for o in orphans:
subject = o['subject']
threads.sort(key=lambda x: strutil.cosine_dist(x['subject'], subject), reverse=True)
if strutil.cosine_dist(threads[0]['subject'], subject) > 0.1:
msgs[msgs.index(threads[0])]['follow-up'].append(o)
msgs.remove(o)
d[tag] = msgs
with open(sel_dump, 'w', encoding='utf-8') as f:
json.dump(d, f, ensure_ascii=False, indent=4)

26
selection/strutil.py Normal file
View File

@ -0,0 +1,26 @@
import re, math
from collections import Counter
WORD = re.compile(r'\w+')
def get_cosine(vec1, vec2):
intersection = set(vec1.keys()) & set(vec2.keys())
numerator = sum([vec1[x] * vec2[x] for x in intersection])
sum1 = sum([vec1[x]**2 for x in vec1.keys()])
sum2 = sum([vec2[x]**2 for x in vec2.keys()])
denominator = math.sqrt(sum1) * math.sqrt(sum2)
if not denominator:
return 0.0
else:
return float(numerator) / denominator
def text_to_vector(text):
words = WORD.findall(text)
return Counter(words)
def cosine_dist(str1, str2):
vec1 = text_to_vector(str1)
vec2 = text_to_vector(str2)
return get_cosine(vec1, vec2)

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,7 @@
{
"cyber": [
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9706/msg00111.html"
}
],
@ -40,63 +40,63 @@
"url": "https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=ind1406&L=new-media-curating&F=&S=&P=54012"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9708/msg00010.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9708/msg00009.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9703/msg00038.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9703/msg00060.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9703/msg00096.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9610/msg00029.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9705/msg00053.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9705/msg00003.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9707/msg00014.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00109.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00117.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00084.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00162.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00153.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00202.html"
},
{
@ -112,13 +112,13 @@
"url": "http://lists.cofa.unsw.edu.au/pipermail/empyre/2016-September/009253.html"
},
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0009/msg00209.html"
}
],
"new media art": [
{
"list": "nettime-l",
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0905/msg00038.html"
}
]

View File

@ -1,4 +1,7 @@
from www import app
import logging
logging.basicConfig(level=logging.DEBUG)
if __name__ == "__main__":
app.run(debug=True, use_reloader=False)

View File

@ -72,7 +72,7 @@ def searh():
k_arg = request.args.get('keyword')
l_arg = request.args.get('list')
f_arg = request.args.get('field')
f_arg = request.args.get('field')
if k_arg is None or k_arg.strip() == '':
return "no keyword..."
@ -88,7 +88,8 @@ def searh():
URL = LISTSERVS_URL + '/search?' + val
logging.info("search keyword = " + k_arg)
logging.debug(val)
r = urllib.request.urlopen(LISTSERVS_URL + '/search?' + val)
data = json.loads(r.read().decode('utf8'))

4
www/static/jquery-3.2.1.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -3,8 +3,11 @@
<title>Times of Nettime</title>
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='c3.min.css') }}">
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='lestyle.css') }}">
<script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
<script type="text/javascript" src="{{ url_for('static',filename='d3.min.js') }}" charset="utf-8"></script>
<!-- <script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
-->
<script type="text/javascript" src="{{ url_for('static',filename='jquery-3.2.1.min.js') }}" charset="utf-8"></script>
<script type="text/javascript" src="{{ url_for('static',filename='d3.min.js') }}" charset="utf-8"></script>
<script type="text/javascript" src="{{ url_for('static',filename='c3.min.js') }}"></script>
<script type=text/javascript src="{{ url_for('static',filename='search.js') }}"></script>
</head>