final stuff

This commit is contained in:
gauthiier
2020-01-12 12:16:10 +01:00
parent 50a99e74ee
commit 62ec88946a
13 changed files with 319 additions and 17360 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
+71 -11
View File
@@ -30,6 +30,21 @@ def sort_sel_dump(tag, sel_dump=sel_dump, sel_out=None):
with open(sel_dump, 'w') as fout: with open(sel_dump, 'w') as fout:
json.dump(d, fout, indent=4, ensure_ascii=False) json.dump(d, fout, indent=4, ensure_ascii=False)
def recursive_sort_by_date(msg):
if 'follow-up' in msg:
msg['follow-up'] = sorted(msg['follow-up'], key=lambda m: export.utils.parse_date_msg(m))
for m in msg['follow-up']:
recursive_sort_by_date(m)
def recursive_get_follow_up(msg):
f = []
if 'follow-up' in msg:
for m in msg['follow-up']:
f += recursive_get_follow_up(m)
f += msg['follow-up']
return f
def fix_missing_content(xml_in, xml_out): def fix_missing_content(xml_in, xml_out):
tree = et.parse(xml_in) tree = et.parse(xml_in)
root = tree.getroot() root = tree.getroot()
@@ -110,19 +125,18 @@ def to_listserv(li, msg):
def emit_mail_xml(msg, li, thread_nbr, msg_nbr): def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
# print(msg['date'] + " - " + msg['url'])
print(msg['date'])
global nn, hashes global nn, hashes
nn += 1 nn += 1
h = hash(msg) # patch h = hash(msg) # patch
if h in hashes: if h in hashes:
logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...") #logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
return '' return ''
else: else:
hashes.append(h) hashes.append(h)
print(msg['date'] + " - " + msg['subject'])
nbr = make_xml_element("nbr", str(thread_nbr) + "." + str(msg_nbr)) + "\n" nbr = make_xml_element("nbr", str(thread_nbr) + "." + str(msg_nbr)) + "\n"
subject = make_xml_element("subject", msg['subject']) + "\n" subject = make_xml_element("subject", msg['subject']) + "\n"
@@ -176,13 +190,38 @@ def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
# content = et.SubElement(mail, 'content') # content = et.SubElement(mail, 'content')
# content.text = e.reply # content.text = e.reply
# recursuve "follow-up" # # recursuve "follow-up"
if 'follow-up' in msg: # if 'follow-up' in msg:
followups = export.utils.index_follow_up(msg)
followups.sort(key=lambda tup: tup[0])
for d, f in followups: # all_follow = recursive_get_follow_up(msg)
msg_nbr += 1 # print(str(len(all_follow)))
mail += emit_mail_xml(f, li, thread_nbr, msg_nbr) # all_follow = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))
# for f in all_follow:
# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
# recursive_sort_by_date(msg)
# for f in msg['follow-up']:
# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
# followups = export.utils.index_follow_up(msg)
# followups.sort(key=lambda tup: tup[0]) # sort by date...?
# for d, f in followups:
# msg_nbr += 1
# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
return mail return mail
@@ -213,6 +252,27 @@ def export_single_tag(t, sel, fout):
chapter_mails += emit_mail_xml(m, m['list'], thread_nbr, 0) chapter_mails += emit_mail_xml(m, m['list'], thread_nbr, 0)
thread_nbr += 1 thread_nbr += 1
msg_nbr = 0
# recursuve "follow-up"
if 'follow-up' in m:
print('follow-up')
all_follow = recursive_get_follow_up(m)
print(str(len(all_follow)))
all_follow = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))
for f in all_follow:
chapter_mails += emit_mail_xml(f, m['list'], thread_nbr, msg_nbr)
msg_nbr += 1
chapter_mails += "</mails>\n" chapter_mails += "</mails>\n"
chapter = "<chapter>\n" + chapter_title + chapter_desc + chapter_mails + "</chapter>" chapter = "<chapter>\n" + chapter_title + chapter_desc + chapter_mails + "</chapter>"
+2
View File
@@ -193,6 +193,8 @@ def recursive_urls(msg):
# <li><a href="' + h.url+ '" target="_blank">' + h.subject + '</a> -- <i>' + h.author_name + '</i> # <li><a href="' + h.url+ '" target="_blank">' + h.subject + '</a> -- <i>' + h.author_name + '</i>
def recursive_info(msg, keep_hierachy=False): def recursive_info(msg, keep_hierachy=False):
print(msg['url'])
r = [{'url': msg['url'], 'subject': msg['subject'], 'author_name': msg['author_name']}] r = [{'url': msg['url'], 'subject': msg['subject'], 'author_name': msg['author_name']}]
if keep_hierachy: if keep_hierachy:
File diff suppressed because one or more lines are too long
+8 -20
View File
@@ -567,6 +567,14 @@
{ {
"list": "nettime_l", "list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0206/msg00103.html" "url": "https://nettime.org/Lists-Archives/nettime-l-0206/msg00103.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0206/msg00037.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0206/msg00041.html"
} }
], ],
"desc": "..." "desc": "..."
@@ -1009,10 +1017,6 @@
{ {
"list": "nettime_l", "list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-1007/msg00044.html" "url": "https://nettime.org/Lists-Archives/nettime-l-1007/msg00044.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-1210/msg00013.html"
} }
], ],
"desc": "..." "desc": "..."
@@ -2066,14 +2070,6 @@
"list": "nettime_l", "list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0009/msg00209.html" "url": "https://nettime.org/Lists-Archives/nettime-l-0009/msg00209.html"
}, },
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00202.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00261.html"
},
{ {
"list": "nettime_l", "list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-9703/msg00094.html" "url": "https://nettime.org/Lists-Archives/nettime-l-9703/msg00094.html"
@@ -2086,14 +2082,6 @@
"list": "nettime_l", "list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00159.html" "url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00159.html"
}, },
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00207.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00211.html"
},
{ {
"list": "nettime_l", "list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00250.html" "url": "https://nettime.org/Lists-Archives/nettime-l-0102/msg00250.html"
+47
View File
@@ -0,0 +1,47 @@
<html>
<head>
<title>Times of Nettime</title>
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='c3.min.css') }}">
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='lestyle.css') }}">
<!-- <script type=text/javascript src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
-->
<script type="text/javascript" src="{{ url_for('static',filename='jquery-3.2.1.min.js') }}" charset="utf-8"></script>
<script type="text/javascript" src="{{ url_for('static',filename='d3.min.js') }}" charset="utf-8"></script>
<script type="text/javascript" src="{{ url_for('static',filename='c3.min.js') }}"></script>
<script type=text/javascript src="{{ url_for('static',filename='search.js') }}"></script>
</head>
<body>
<form action="/search" method="get" id="search">
<label>keyword: </label><input type="search" name="keyword">
<select form="search" name="list">
<option value="all">all</option>
{% for a in archives %}
<option value="{{ a }}">{{ a }}</option>
{% endfor %}
</select>
<select form="search" name="field">
{% for a in fields %}
<option value="{{ a }}">{{ a }}</option>
{% endfor %}
</select>
<input type="submit" value="search" id="submit">
<input type="button" value=" ? " id="info">
<div id="loading">Loading...</div>
</form>
<div id="info-search" style="display: none">
<table><tbody><tr><th>Operator</th><th> </th></tr>
<tr><td>+</td><td>The word is mandatory in all text returned.</td></tr>
<tr><td>-</td><td>The word cannot appear in any text returned.</td></tr>
<tr><td>&lt;</td><td>The word that follows has a lower relevance than other words, although text containing it will still match</td></tr>
<tr><td>&gt;</td><td>The word that follows has a higher relevance than other words.</td></tr>
<tr><td>()</td><td>Used to group words into subexpressions.</td></tr>
<tr><td>~</td><td>The word following contributes negatively to the relevance of the text (which is different to the '-' operator, which specifically excludes the word, or the '&lt;' operator, which still causes the word to contribute positively to the relevance of the text.</td></tr>
<tr><td>*</td><td>The wildcard, indicating zero or more characters. It can only appear at the end of a word.</td></tr>
<tr><td>"</td><td>Anything enclosed in the double quotes is taken as a whole (so you can match phrases, for example).</td></tr>
</tbody></table>
</div>
<div id="graph"></div>
<div id="export"><a id="link" href="">export</a></div>
</body>
</html>