List_server_busy/export/exportxml.py

import json, os, logging
import xml.etree.ElementTree as et
from xml.sax.saxutils import unescape, escape
import export.utils
import config
from export import emailreply
import re, hashlib, datetime

nn = 0

sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])
xml_dump = os.path.join(config.export['path'], config.export['xml'])

'''
utils
'''

def export_generate_path(tag):
	now = datetime.datetime.now()
	return os.path.join(config.export['path'], tag + "_[now].xml")

def sort_sel_dump(tag, sel_dump=sel_dump, sel_out=None):

	with open(sel_dump) as fin:
		d = json.load(fin)

	sorted_list = sorted(d[tag]['lists'], key=lambda m: export.utils.parse_date_msg(m))
	d[tag]['lists'] = sorted_list

	with open(sel_dump, 'w') as fout:
		json.dump(d, fout, indent=4, ensure_ascii=False)

def recursive_sort_by_date(msg):	
	if 'follow-up' in msg:
		msg['follow-up'] = sorted(msg['follow-up'], key=lambda m: export.utils.parse_date_msg(m))
		for m in msg['follow-up']:
			recursive_sort_by_date(m)

def recursive_get_follow_up(msg):
	f = []
	if 'follow-up' in msg:
		for m in msg['follow-up']:
			f += recursive_get_follow_up(m)
		f += msg['follow-up']
	return f


def fix_missing_content(xml_in, xml_out):
	tree = et.parse(xml_in)
	root = tree.getroot()
	tag = root.find('title').text
	content_less = root.findall('.//*[content=""]/.')
	if len(content_less) < 0:
		return

	with open(sel_dump) as fin:
		d = json.load(fin)
		
	for m in content_less:
		date_str = m.find('date').text
		print(date_str)
		for msg in d[tag]["lists"]:
			ml = find_msg_by_date_recursive(msg, date_str)
			if ml is not None:
				m.find('content').text = ml['content']

	# tree.write(xml_out)
	with open(xml_out, "w") as fout:
		fout.write(et.tostring(root).decode('utf-8', 'ignore'))

def find_msg_by_date_recursive(msg, date_str):
	if msg['date'] == date_str:
		return msg
	if 'follow-up' in msg:
		for m in msg['follow-up']:
			r = find_msg_by_date_recursive(m, date_str)
			if r is not None:
				return r
	return None


'''
xml export
'''

def hash(m):
	return hashlib.sha256((m['from'] + m['subject'] + m['date']).encode("utf-8")).hexdigest()

def make_xml_element(el, val):
	return "<" + el + ">" + escape(val) + "</" + el + ">"

'''
This is pretty patched up...........................................................
'''

def to_listserv(li, msg):
	if li == 'crumb':																	# patch
		return '<new-media-curating@jiscmail.ac.uk>'
	elif li == 'spectre':
		return 'spectre@mikrolisten.de'
	elif li == 'empyre':
		return '<empyre@lists.cofa.unsw.edu.au>'
	elif li == 'nettime_bold':
		return 'nettime-bold@nettime.org'
	elif li == 'nettime_l':
		# nettime-l@desk.nl -- June 8 1999
		# mettime-l-temp@material.net -- July 15 1999
		# nettime-l@bbs.thing.net> -- July 19 2007
		# nettime-l@kein.org
		dtz = export.utils.parse_date_msg(msg)
		if dtz is not None:
			d = datetime.datetime.fromtimestamp(dtz).date()
			if d < datetime.date(1999, 6, 8):
				return 'nettime-l@desk.nl'
			elif d < datetime.date(1999, 7, 15):
				return 'nettime-l-temp@material.net'
			elif d < datetime.date(2007, 7, 19):
				return 'nettime-l@bbs.thing.net'
		return 'nettime-l@kein.org'
	elif li == 'oldboys':
		return 'oldboys@lists.ccc.de'

	#### SYNDICATE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! aarrgghhh
	else:
		logging.warning("no listserv to...")
		return 'n/a'

def emit_mail_xml(msg, li, thread_nbr, msg_nbr):	

	global nn, hashes
	nn += 1

	h = hash(msg)																			# patch
	if h in hashes:
		#logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
		return ''
	else:
		hashes.append(h)

	print(msg['date'] + " - " + msg['subject'])		

	nbr = make_xml_element("nbr", str(thread_nbr) + "." + str(msg_nbr)) + "\n"

	subject = make_xml_element("subject", msg['subject']) + "\n"

	to = make_xml_element("to", to_listserv(li, msg)) + "\n"								# patch

	from_ = make_xml_element("from", msg['author_name']) + "\n"

	date = make_xml_element("date", msg['date']) + "\n"

	'''
	todo:
		- filter reply 
		- unescape XML
	'''
	e = emailreply.EmailMessage(export.utils.format_content(msg['content']))
	e.read()

	# content_stripped = re.sub(r'\n[ ]{2,}', '\n\n', e.reply)								# patch
	# # content_stripped = re.sub(r'(?<!\n)\n(?!\n)', ' ', e.reply)
	# content_stripped = re.sub(r'(?<!\n)\n(?!\n)', ' ', content_stripped)
	# # content_stripped = re.sub(r'\n[ ]{2,}\w', '\n\n', content_stripped)


	# content_stripped = e.reply

	content_stripped = msg['content']


	content = make_xml_element("content", content_stripped) + "\n"

	mail = "<mail>\n" + nbr + subject + from_ + to + date + content + "</mail>\n"

	# content = et.SubElement(mail, 'content')
	# content.text = e.reply

	# # recursuve "follow-up"
	# if 'follow-up' in msg:


	# 	all_follow = recursive_get_follow_up(msg)
	# 	print(str(len(all_follow)))
	# 	all_follow  = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))
	# 	for f in all_follow:
	# 		mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)


		# recursive_sort_by_date(msg)
		# for f in msg['follow-up']:
		# 	mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)


		# followups = export.utils.index_follow_up(msg)
		# followups.sort(key=lambda tup: tup[0])								# sort by date...?
		# for d, f in followups:
		# 	msg_nbr += 1
		# 	mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)

	return mail

def export_single_tag(t, sel, fout):

	global hashes

	if t not in list(sel.keys()):
		logging.error("Tag: " + t + " does not exists.")
		return False

	logging.info("Exporting tag:" + t)

	ch = sel[t]

	chapter = "<chapter>\n"

	chapter_title = make_xml_element("title", t) + "\n"

	chapter_desc = make_xml_element("desc", ch['desc']) + "\n"

	chapter_mails = "<mails>\n"

	hashes = []
	thread_nbr = 0

	for m in ch['lists']:
		chapter_mails += emit_mail_xml(m, m['list'], thread_nbr, 0)
		thread_nbr += 1


		msg_nbr = 0
		# recursuve "follow-up"
		if 'follow-up' in m:

			print('follow-up')


			all_follow = recursive_get_follow_up(m)
			print(str(len(all_follow)))
			all_follow  = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))
			for f in all_follow:
				chapter_mails += emit_mail_xml(f, m['list'], thread_nbr, msg_nbr)
				msg_nbr += 1


	chapter_mails += "</mails>\n"

	chapter = "<chapter>\n" + chapter_title + chapter_desc + chapter_mails + "</chapter>"

	fout.write(chapter.encode('utf-8'))

	return True

def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump):

	with open(sel_dump) as fin:
		d = json.load(fin)

	with open(xml_dump, 'wb') as fout:
		for k in d.keys():
			if not export_single_tag(k, d, fout):
				logging.error("Error exporting: " + k)
				return False
	return True

def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump):

	with open(sel_dump) as fin:
		d = json.load(fin)

	now = datetime.datetime.now()
	xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S"))

	with open(xml_out, 'wb') as fout:
		if not export_single_tag(tag, d, fout):
			logging.error("Error exporting: " + tag)
			return False
	return True


#------------------------------------------------------------
# The following functions parse the archive files directly
#------------------------------------------------------------

def export_file(f, fout):

	with open(f) as fp:
		d = json.load(fp)

	all_mail = et.Element('all')
	for t in d['threads']:
		emit_mail_xml(t, all_mail)		

	fout.write(et.tostring(all_mail).decode('utf-8', 'ignore'))		

def parse_date_file(fname):
	return datetime.datetime.strptime(fname, '%B_%Y.json')

def export_year(d, dt, fout):

	dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]

	chapter = et.Element('chapter')
	year = et.SubElement(chapter, 'year')
	year.text = dt.strftime('%Y')

	# SORT MONTHS BEFORE WRITING TO XML
	dates = []
	for f in dir_files:

		fdt = parse_date_file(os.path.basename(f))
		if dt.year != fdt.year:
			continue

		dates.append((fdt, f))

	dates.sort(key=lambda tup: tup[0])

	for d, f in dates:

		logging.debug(f)

		section = et.SubElement(chapter, 'section')
		month = et.SubElement(section, 'month')
		month.text = d.strftime('%B')

		with open(f) as fp:
			dj = json.load(fp)

		mails = et.SubElement(section, 'mails')
		for t in dj['threads']:
			emit_mail_xml(t, mails)		

	# write utf8 to file (et.tostring are bytes)
	# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))
	fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))
template indt 2019-12-27 15:30:07 +01:00			`import json, os, logging`
export 2019-12-26 18:12:49 +01:00			`import xml.etree.ElementTree as et`
cleaning emails 2019-12-28 15:58:48 +01:00			`from xml.sax.saxutils import unescape, escape`
export 2019-12-26 18:12:49 +01:00			`import export.utils`
			`import config`
cleaning emails 2019-12-28 15:58:48 +01:00			`from export import emailreply`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`import re, hashlib, datetime`
export 2019-12-26 18:12:49 +01:00
			`nn = 0`

			`sel_dump = os.path.join(config.selection['path'], config.selection['sel_dump'])`
			`xml_dump = os.path.join(config.export['path'], config.export['xml'])`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`'''`
			`utils`
			`'''`
export 2019-12-26 18:12:49 +01:00
			`def export_generate_path(tag):`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`now = datetime.datetime.now()`
export 2019-12-26 18:12:49 +01:00			`return os.path.join(config.export['path'], tag + "_[now].xml")`

semi final of the book 2020-01-02 22:49:07 +01:00			`def sort_sel_dump(tag, sel_dump=sel_dump, sel_out=None):`

			`with open(sel_dump) as fin:`
			`d = json.load(fin)`

			`sorted_list = sorted(d[tag]['lists'], key=lambda m: export.utils.parse_date_msg(m))`
			`d[tag]['lists'] = sorted_list`

			`with open(sel_dump, 'w') as fout:`
			`json.dump(d, fout, indent=4, ensure_ascii=False)`

final stuff 2020-01-12 12:16:10 +01:00			`def recursive_sort_by_date(msg):`
			`if 'follow-up' in msg:`
			`msg['follow-up'] = sorted(msg['follow-up'], key=lambda m: export.utils.parse_date_msg(m))`
			`for m in msg['follow-up']:`
			`recursive_sort_by_date(m)`

			`def recursive_get_follow_up(msg):`
			`f = []`
			`if 'follow-up' in msg:`
			`for m in msg['follow-up']:`
			`f += recursive_get_follow_up(m)`
			`f += msg['follow-up']`
			`return f`


semi final of the book 2020-01-02 22:49:07 +01:00			`def fix_missing_content(xml_in, xml_out):`
			`tree = et.parse(xml_in)`
			`root = tree.getroot()`
			`tag = root.find('title').text`
			`content_less = root.findall('.//*[content=""]/.')`
			`if len(content_less) < 0:`
			`return`

			`with open(sel_dump) as fin:`
			`d = json.load(fin)`

			`for m in content_less:`
			`date_str = m.find('date').text`
			`print(date_str)`
			`for msg in d[tag]["lists"]:`
			`ml = find_msg_by_date_recursive(msg, date_str)`
			`if ml is not None:`
			`m.find('content').text = ml['content']`

			`# tree.write(xml_out)`
			`with open(xml_out, "w") as fout:`
			`fout.write(et.tostring(root).decode('utf-8', 'ignore'))`

			`def find_msg_by_date_recursive(msg, date_str):`
			`if msg['date'] == date_str:`
			`return msg`
			`if 'follow-up' in msg:`
			`for m in msg['follow-up']:`
			`r = find_msg_by_date_recursive(m, date_str)`
			`if r is not None:`
			`return r`
			`return None`


duplication fix + templates 2019-12-31 09:53:46 +01:00			`'''`
			`xml export`
			`'''`

			`def hash(m):`
			`return hashlib.sha256((m['from'] + m['subject'] + m['date']).encode("utf-8")).hexdigest()`

cleaning emails 2019-12-28 15:58:48 +01:00			`def make_xml_element(el, val):`
			`return "<" + el + ">" + escape(val) + "</" + el + ">"`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`'''`
			`This is pretty patched up...........................................................`
			`'''`

			`def to_listserv(li, msg):`
			`if li == 'crumb': # patch`
			`return '<new-media-curating@jiscmail.ac.uk>'`
			`elif li == 'spectre':`
			`return 'spectre@mikrolisten.de'`
			`elif li == 'empyre':`
			`return '<empyre@lists.cofa.unsw.edu.au>'`
			`elif li == 'nettime_bold':`
			`return 'nettime-bold@nettime.org'`
			`elif li == 'nettime_l':`
			`# nettime-l@desk.nl -- June 8 1999`
			`# mettime-l-temp@material.net -- July 15 1999`
			`# nettime-l@bbs.thing.net> -- July 19 2007`
			`# nettime-l@kein.org`
			`dtz = export.utils.parse_date_msg(msg)`
			`if dtz is not None:`
			`d = datetime.datetime.fromtimestamp(dtz).date()`
			`if d < datetime.date(1999, 6, 8):`
			`return 'nettime-l@desk.nl'`
			`elif d < datetime.date(1999, 7, 15):`
some stuff 2020-01-25 10:58:25 +01:00			`return 'nettime-l-temp@material.net'`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`elif d < datetime.date(2007, 7, 19):`
			`return 'nettime-l@bbs.thing.net'`
			`return 'nettime-l@kein.org'`
semi final of the book 2020-01-02 22:49:07 +01:00			`elif li == 'oldboys':`
			`return 'oldboys@lists.ccc.de'`
some stuff 2020-01-25 10:58:25 +01:00
			`#### SYNDICATE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! aarrgghhh`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`else:`
			`logging.warning("no listserv to...")`
			`return 'n/a'`
cleaning emails 2019-12-28 15:58:48 +01:00
final stuff 2020-01-12 12:16:10 +01:00			`def emit_mail_xml(msg, li, thread_nbr, msg_nbr):`
semi final of the book 2020-01-02 22:49:07 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`global nn, hashes`
export 2019-12-26 18:12:49 +01:00			`nn += 1`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`h = hash(msg) # patch`
			`if h in hashes:`
final stuff 2020-01-12 12:16:10 +01:00			`#logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`return ''`
			`else:`
			`hashes.append(h)`

final stuff 2020-01-12 12:16:10 +01:00			`print(msg['date'] + " - " + msg['subject'])`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`nbr = make_xml_element("nbr", str(thread_nbr) + "." + str(msg_nbr)) + "\n"`
template indt 2019-12-27 15:30:07 +01:00
cleaning emails 2019-12-28 15:58:48 +01:00			`subject = make_xml_element("subject", msg['subject']) + "\n"`
export 2019-12-26 18:12:49 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`to = make_xml_element("to", to_listserv(li, msg)) + "\n" # patch`
cleaning emails 2019-12-28 15:58:48 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`from_ = make_xml_element("from", msg['author_name']) + "\n"`
cleaning emails 2019-12-28 15:58:48 +01:00
			`date = make_xml_element("date", msg['date']) + "\n"`

			`'''`
			`todo:`
			`- filter reply`
			`- unescape XML`
			`'''`
			`e = emailreply.EmailMessage(export.utils.format_content(msg['content']))`
			`e.read()`
export 2019-12-26 18:12:49 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`# content_stripped = re.sub(r'\n[ ]{2,}', '\n\n', e.reply) # patch`
			`# # content_stripped = re.sub(r'(?<!\n)\n(?!\n)', ' ', e.reply)`
			`# content_stripped = re.sub(r'(?<!\n)\n(?!\n)', ' ', content_stripped)`
			`# # content_stripped = re.sub(r'\n[ ]{2,}\w', '\n\n', content_stripped)`

semi final of the book 2020-01-02 22:49:07 +01:00










			`# content_stripped = e.reply`

			`content_stripped = msg['content']`








export 2019-12-26 18:12:49 +01:00
cleaning emails 2019-12-28 15:58:48 +01:00			`content = make_xml_element("content", content_stripped) + "\n"`
export 2019-12-26 18:12:49 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`mail = "<mail>\n" + nbr + subject + from_ + to + date + content + "</mail>\n"`
cleaning emails 2019-12-28 15:58:48 +01:00
			`# content = et.SubElement(mail, 'content')`
			`# content.text = e.reply`
export 2019-12-26 18:12:49 +01:00
final stuff 2020-01-12 12:16:10 +01:00			`# # recursuve "follow-up"`
			`# if 'follow-up' in msg:`


			`# all_follow = recursive_get_follow_up(msg)`
			`# print(str(len(all_follow)))`
			`# all_follow = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))`
			`# for f in all_follow:`
			`# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)`









			`# recursive_sort_by_date(msg)`
			`# for f in msg['follow-up']:`
			`# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)`






			`# followups = export.utils.index_follow_up(msg)`
			`# followups.sort(key=lambda tup: tup[0]) # sort by date...?`
			`# for d, f in followups:`
			`# msg_nbr += 1`
			`# mail += emit_mail_xml(f, li, thread_nbr, msg_nbr)`
cleaning emails 2019-12-28 15:58:48 +01:00
			`return mail`

export 2019-12-26 18:12:49 +01:00			`def export_single_tag(t, sel, fout):`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`global hashes`

export 2019-12-26 18:12:49 +01:00			`if t not in list(sel.keys()):`
			`logging.error("Tag: " + t + " does not exists.")`
			`return False`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`logging.info("Exporting tag:" + t)`
export 2019-12-26 18:12:49 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`ch = sel[t]`
export 2019-12-26 18:12:49 +01:00
cleaning emails 2019-12-28 15:58:48 +01:00			`chapter = "<chapter>\n"`
export 2019-12-26 18:12:49 +01:00
cleaning emails 2019-12-28 15:58:48 +01:00			`chapter_title = make_xml_element("title", t) + "\n"`

			`chapter_desc = make_xml_element("desc", ch['desc']) + "\n"`

			`chapter_mails = "<mails>\n"`
export 2019-12-26 18:12:49 +01:00
duplication fix + templates 2019-12-31 09:53:46 +01:00			`hashes = []`
			`thread_nbr = 0`

final stuff 2020-01-12 12:16:10 +01:00			`for m in ch['lists']:`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`chapter_mails += emit_mail_xml(m, m['list'], thread_nbr, 0)`
			`thread_nbr += 1`
cleaning emails 2019-12-28 15:58:48 +01:00
final stuff 2020-01-12 12:16:10 +01:00


			`msg_nbr = 0`
			`# recursuve "follow-up"`
			`if 'follow-up' in m:`

			`print('follow-up')`


			`all_follow = recursive_get_follow_up(m)`
			`print(str(len(all_follow)))`
			`all_follow = sorted(all_follow, key=lambda m: export.utils.parse_date_msg(m))`
			`for f in all_follow:`
			`chapter_mails += emit_mail_xml(f, m['list'], thread_nbr, msg_nbr)`
			`msg_nbr += 1`





cleaning emails 2019-12-28 15:58:48 +01:00			`chapter_mails += "</mails>\n"`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`chapter = "<chapter>\n" + chapter_title + chapter_desc + chapter_mails + "</chapter>"`
cleaning emails 2019-12-28 15:58:48 +01:00
			`fout.write(chapter.encode('utf-8'))`

export 2019-12-26 18:12:49 +01:00			`return True`

			`def export_selection_all(sel_dump=sel_dump, xml_out=xml_dump):`

			`with open(sel_dump) as fin:`
			`d = json.load(fin)`

cleaning emails 2019-12-28 15:58:48 +01:00			`with open(xml_dump, 'wb') as fout:`
export 2019-12-26 18:12:49 +01:00			`for k in d.keys():`
			`if not export_single_tag(k, d, fout):`
			`logging.error("Error exporting: " + k)`
			`return False`
			`return True`

			`def export_selection_tag(tag, sel_dump=sel_dump, xml_out=xml_dump):`

			`with open(sel_dump) as fin:`
			`d = json.load(fin)`

duplication fix + templates 2019-12-31 09:53:46 +01:00			`now = datetime.datetime.now()`
export 2019-12-26 18:12:49 +01:00			`xml_out.replace("[now]", now.strftime("%d-%m-%y_%H:%M:%S"))`

cleaning emails 2019-12-28 15:58:48 +01:00			`with open(xml_out, 'wb') as fout:`
export 2019-12-26 18:12:49 +01:00			`if not export_single_tag(tag, d, fout):`
			`logging.error("Error exporting: " + tag)`
			`return False`
			`return True`



			`#------------------------------------------------------------`
			`# The following functions parse the archive files directly`
			`#------------------------------------------------------------`

			`def export_file(f, fout):`

			`with open(f) as fp:`
			`d = json.load(fp)`

			`all_mail = et.Element('all')`
			`for t in d['threads']:`
			`emit_mail_xml(t, all_mail)`

			`fout.write(et.tostring(all_mail).decode('utf-8', 'ignore'))`

			`def parse_date_file(fname):`
duplication fix + templates 2019-12-31 09:53:46 +01:00			`return datetime.datetime.strptime(fname, '%B_%Y.json')`
export 2019-12-26 18:12:49 +01:00
			`def export_year(d, dt, fout):`

			`dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]`

			`chapter = et.Element('chapter')`
			`year = et.SubElement(chapter, 'year')`
			`year.text = dt.strftime('%Y')`

			`# SORT MONTHS BEFORE WRITING TO XML`
			`dates = []`
			`for f in dir_files:`

			`fdt = parse_date_file(os.path.basename(f))`
			`if dt.year != fdt.year:`
			`continue`

			`dates.append((fdt, f))`

			`dates.sort(key=lambda tup: tup[0])`

			`for d, f in dates:`

			`logging.debug(f)`

			`section = et.SubElement(chapter, 'section')`
			`month = et.SubElement(section, 'month')`
			`month.text = d.strftime('%B')`

			`with open(f) as fp:`
			`dj = json.load(fp)`

			`mails = et.SubElement(section, 'mails')`
			`for t in dj['threads']:`
			`emit_mail_xml(t, mails)`

			`# write utf8 to file (et.tostring are bytes)`
			`# fout.write(et.tostring(chapter).decode('utf-8', 'ignore'))`
			`fout.write(export.utils.remove_invalid_xml_characters(et.tostring(chapter).decode('utf-8', 'ignore')))`