semi final of the book

This commit is contained in:
gauthiier
2020-01-02 22:49:07 +01:00
parent e5786987e4
commit b046f6a211
5 changed files with 5406 additions and 2261 deletions
+72 -1
View File
@@ -19,6 +19,51 @@ def export_generate_path(tag):
now = datetime.datetime.now()
return os.path.join(config.export['path'], tag + "_[now].xml")
def sort_sel_dump(tag, sel_dump=sel_dump, sel_out=None):
with open(sel_dump) as fin:
d = json.load(fin)
sorted_list = sorted(d[tag]['lists'], key=lambda m: export.utils.parse_date_msg(m))
d[tag]['lists'] = sorted_list
with open(sel_dump, 'w') as fout:
json.dump(d, fout, indent=4, ensure_ascii=False)
def fix_missing_content(xml_in, xml_out):
tree = et.parse(xml_in)
root = tree.getroot()
tag = root.find('title').text
content_less = root.findall('.//*[content=""]/.')
if len(content_less) < 0:
return
with open(sel_dump) as fin:
d = json.load(fin)
for m in content_less:
date_str = m.find('date').text
print(date_str)
for msg in d[tag]["lists"]:
ml = find_msg_by_date_recursive(msg, date_str)
if ml is not None:
m.find('content').text = ml['content']
# tree.write(xml_out)
with open(xml_out, "w") as fout:
fout.write(et.tostring(root).decode('utf-8', 'ignore'))
def find_msg_by_date_recursive(msg, date_str):
if msg['date'] == date_str:
return msg
if 'follow-up' in msg:
for m in msg['follow-up']:
r = find_msg_by_date_recursive(m, date_str)
if r is not None:
return r
return None
'''
xml export
'''
@@ -57,12 +102,17 @@ def to_listserv(li, msg):
elif d < datetime.date(2007, 7, 19):
return 'nettime-l@bbs.thing.net'
return 'nettime-l@kein.org'
elif li == 'oldboys':
return 'oldboys@lists.ccc.de'
else:
logging.warning("no listserv to...")
return 'n/a'
def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
# print(msg['date'] + " - " + msg['url'])
print(msg['date'])
global nn, hashes
nn += 1
@@ -96,7 +146,28 @@ def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
# content_stripped = re.sub(r'(?<!\n)\n(?!\n)', ' ', content_stripped)
# # content_stripped = re.sub(r'\n[ ]{2,}\w', '\n\n', content_stripped)
content_stripped = e.reply
# content_stripped = e.reply
content_stripped = msg['content']
content = make_xml_element("content", content_stripped) + "\n"
+473 -12
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+17 -21
View File
@@ -1089,10 +1089,6 @@
"list": "nettime_bold",
"url": "https://nettime.org/Lists-Archives/nettime-bold-0203/msg00522.html"
},
{
"list": "nettime_bold",
"url": "https://nettime.org/Lists-Archives/nettime-bold-0112/msg00157.html"
},
{
"list": "nettime_bold",
"url": "https://nettime.org/Lists-Archives/nettime-bold-0105/msg00116.html"
@@ -2011,23 +2007,6 @@
],
"desc": "..."
},
"Art (critique)": {
"lists": [
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0807/msg00081.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0905/msg00038.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-1210/msg00020.html"
}
],
"desc": "..."
},
"FLOSS": {
"lists": [
{
@@ -2178,5 +2157,22 @@
}
],
"desc": "..."
},
"Critique Art Critique": {
"lists": [
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0807/msg00081.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-0905/msg00038.html"
},
{
"list": "nettime_l",
"url": "https://nettime.org/Lists-Archives/nettime-l-1210/msg00020.html"
}
],
"desc": "..."
}
}