final stuff

This commit is contained in:
gauthiier
2020-01-12 12:16:10 +01:00
parent 50a99e74ee
commit 62ec88946a
13 changed files with 319 additions and 17360 deletions
+73 -13
View File
@@ -30,6 +30,21 @@ def sort_sel_dump(tag, sel_dump=sel_dump, sel_out=None):
with open(sel_dump, 'w') as fout:
json.dump(d, fout, indent=4, ensure_ascii=False)
def recursive_sort_by_date(msg):
if 'follow-up' in msg:
msg['follow-up'] = sorted(msg['follow-up'], key=lambda m: export.utils.parse_date_msg(m))
for m in msg['follow-up']:
recursive_sort_by_date(m)
def recursive_get_follow_up(msg):
f = []
if 'follow-up' in msg:
for m in msg['follow-up']:
f += recursive_get_follow_up(m)
f += msg['follow-up']
return f
def fix_missing_content(xml_in, xml_out):
tree = et.parse(xml_in)
root = tree.getroot()
@@ -108,21 +123,20 @@ def to_listserv(li, msg):
logging.warning("no listserv to...")
return 'n/a'
def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
# print(msg['date'] + " - " + msg['url'])
print(msg['date'])
def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
global nn, hashes
nn += 1
h = hash(msg) # patch
if h in hashes:
logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
#logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
return ''
else:
hashes.append(h)
print(msg['date'] + " - " + msg['subject'])
nbr = make_xml_element("nbr", str(thread_nbr) + "." + str(msg_nbr)) + "\n"
subject = make_xml_element("subject", msg['subject']) + "\n"
@@ -176,13 +190,38 @@ def emit_mail_xml(msg, li, thread_nbr, msg_nbr):
# content = et.SubElement(mail, 'content')
# content.text = e.reply
# recursuve "follow-up"
if 'follow-up' in msg:
followups = export.utils.index_follow_up(msg)
followups.sort(key=lambda tup: tup[0])
for d, f in followups:
msg_nbr += 1
mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
# # recursuve "follow-up"
# if 'follow-up' in msg:
# all_follow = recursive_get_follow_up(msg)
# print(str(len(all_follow)))
# all_follow = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))
# for f in all_follow:
# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
# recursive_sort_by_date(msg)
# for f in msg['follow-up']:
# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
# followups = export.utils.index_follow_up(msg)
# followups.sort(key=lambda tup: tup[0]) # sort by date...?
# for d, f in followups:
# msg_nbr += 1
# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)
return mail
@@ -209,10 +248,31 @@ def export_single_tag(t, sel, fout):
hashes = []
thread_nbr = 0
for m in ch['lists']:
for m in ch['lists']:
chapter_mails += emit_mail_xml(m, m['list'], thread_nbr, 0)
thread_nbr += 1
msg_nbr = 0
# recursuve "follow-up"
if 'follow-up' in m:
print('follow-up')
all_follow = recursive_get_follow_up(m)
print(str(len(all_follow)))
all_follow = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))
for f in all_follow:
chapter_mails += emit_mail_xml(f, m['list'], thread_nbr, msg_nbr)
msg_nbr += 1
chapter_mails += "</mails>\n"
chapter = "<chapter>\n" + chapter_title + chapter_desc + chapter_mails + "</chapter>"
+1 -1
View File
@@ -46,7 +46,7 @@ def parse_date_msg(msg):
# recursive
def index_follow_up(msg):
r = []
if 'follow-up' in msg:
if 'follow-up' in msg:
for m in msg['follow-up']:
d = parse_date_msg(m)
if d is None: