haha! commit

2019-11-27 15:45:18 +01:00 · 2019-11-27 15:45:18 +01:00 · f68999db50
commit f68999db50
parent 63163c0690
12 changed files with 205 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1,3 @@
+archives.zip filter=lfs diff=lfs merge=lfs -text
+*.xml filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,10 @@
+# project based
+
+.DS_Store
+archives/
+
+
+
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@ -1 +1,3 @@
 _*_*_*_
+
+Mailinglists Annalen
--- a/archives.zip
+++ b/archives.zip
--- a/book/docs/Standard
+++ b/book/docs/Standard
--- a/book/docs/Thread
+++ b/book/docs/Thread
--- a/book/docs/indesign_and_xml_technical_reference.pdf
+++ b/book/docs/indesign_and_xml_technical_reference.pdf
--- a/book/template-essay-chapter-section.indt
+++ b/book/template-essay-chapter-section.indt
--- a/book/template-essay.indt
+++ b/book/template-essay.indt
--- a/export_xml.py
+++ b/export_xml.py
@ -0,0 +1,116 @@
+import argparse, os, glob, sys, json
+import xml.etree.ElementTree as et
+from datetime import datetime
+
+def format_subject(s):
+	return ' '.join(s.split())
+
+def format_content(c):
+	return c.strip().replace("\n\n\n", "\n")
+
+def parse_date(fname):
+	return datetime.strptime(fname, '%B_%Y.json')
+
+def emit_mail_xml(msg, xmlel):
+
+	mail = et.SubElement(xmlel, 'mail')
+
+	subject = et.SubElement(mail, 'subject')
+	subject.text = format_subject(msg['subject'])
+
+	to = et.SubElement(mail, 'to')
+	to.text = msg['to']
+
+	from_ = et.SubElement(mail, 'from')
+	from_.text = msg['from']
+
+	date = et.SubElement(mail, 'date')
+	date.text = msg['date']
+
+	content = et.SubElement(mail, 'content')
+	## unescape chars ...
+	content.text = format_content(msg['content'])
+
+	if msg['follow-up']:
+		print('follow-up')
+
+	# **** RECURSIVE "follow-up" ****
+
+
+def export_file(f, fout):
+
+	with open(f) as fp:
+		d = json.load(fp)
+
+	all_mail = et.Element('all')
+	for t in d['threads']:
+		emit_mail_xml(t, all_mail)		
+
+	fout.write(et.tostring(all_mail))		
+
+def export_year(d, dt, fout):
+
+	dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
+
+	chapter = et.Element('chapter')
+	year = et.SubElement(chapter, 'year')
+	year.text = dt.strftime('%Y')
+
+	# SORT MONTHS BEFORE WRITING TO XML
+	dates = []
+	for f in dir_files:
+
+		fdt = parse_date(os.path.basename(f))
+		if dt.year != fdt.year:
+			continue
+
+		dates.append((fdt, f))
+
+	dates.sort(key=lambda tup: tup[0])
+
+	for d, f in dates:
+
+		print(d)
+		print(f)
+
+		section = et.SubElement(chapter, 'section')
+		month = et.SubElement(section, 'month')
+		month.text = d.strftime('%B')
+
+		with open(f) as fp:
+			dj = json.load(fp)
+
+		mails = et.SubElement(section, 'mails')
+		for t in dj['threads']:
+			emit_mail_xml(t, mails)		
+
+	fout.write(et.tostring(chapter))
+
+if __name__ == "__main__":
+
+	p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
+	p.add_argument('file', metavar="f", help="mailinglist file(s) or dir(s) to export", nargs="+")
+	p.add_argument('--year', '-y', metavar='y', type=str, help='year of archive')
+
+	args = p.parse_args()
+
+	if args.year:
+		dt = datetime.strptime(args.year, '%Y')		
+
+	if not args.file:
+		sys.exit('No file(s). Aborting.')
+
+# with open("out.xml", "w") as fout:
+	for f in args.file:
+		if args.year:
+			if not os.path.isdir(f):
+				sys.exit(f + ' is not a valid directory. Aborting.')
+			foutname = os.path.basename(f) + "_" + dt.strftime('%Y') + ".xml"
+			with open(foutname, "wb") as fout:
+				export_year(f, dt, fout)
+		else:
+			if not os.path.isfile(f):
+				sys.exit(f + ' is not a valid file. Aborting.')			
+			export_file(f, fout)
+
+
--- a/rename_nettime.py
+++ b/rename_nettime.py
@ -0,0 +1,15 @@
+import argparse, os, sys, glob, json
+from datetime import datetime
+
+def new_name(n):
+	dt = datetime.strptime(n, 'nettime-l_%b_%y.json')
+	return dt.strftime('%B_%Y') + '.json'
+
+if __name__ == "__main__":
+
+	files = [f for f in glob.glob("archives/nettime-l/*.json")]
+
+	for f in files:
+		newname = 'archives/nettime-l/' + new_name(os.path.basename(f))
+		os.rename(f, newname)
+
--- a/stats.py
+++ b/stats.py
@ -0,0 +1,56 @@
+import argparse, os, sys, glob, json
+
+ARCH = "archives/"
+
+
+def run(l):
+
+	if not l.startswith(ARCH):
+		l = os.path.join(ARCH, l)
+
+	if not os.path.isdir(l):
+		sys.exit(l + ' is not a valid archive. Aborting.')
+
+	files = [f for f in glob.glob(os.path.join(l, "*.json"))]
+
+	total_chars = 0
+	total_words = 0
+	total_lines = 0
+	for f in files:		
+		with open(f) as fp:
+			d = json.load(fp)
+			# print(d['name'])
+			chars = 0
+			words = 0
+			lines = 0
+			for t in d['threads']:
+				chars += len(t["content"])
+				words += len(t["content"].split())
+				lines += len(t["content"].split('\n'))
+			# print("	chars: " + str(chars))
+			# print("	words: " + str(words))
+			# print("	lines: " + str(lines))
+			total_chars += chars
+			total_words += words
+			total_lines += lines
+
+	print("\n\n" + l)
+	print("Total chars: " + str(total_chars))
+	print("Total words: " + str(total_words))
+	print("Total lines: " + str(total_lines))
+
+
+if __name__ == "__main__":
+
+	p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
+	p.add_argument('list', metavar="list", help="mailinglist to ana", nargs="+")
+
+	args = p.parse_args()
+
+	if not args.list:
+		sys.exit('No list(s). Aborting.')
+
+	for l in args.list:
+		run(l)
+
+	print("\n\n . . . . ")