diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e432741 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +archives.zip filter=lfs diff=lfs merge=lfs -text +*.xml filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 6a18ad4..caf400b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +# project based + +.DS_Store +archives/ + + + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index f21ad6d..cc9da7b 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ _*_*_*_ + +Mailinglists Annalen diff --git a/archives.zip b/archives.zip new file mode 100644 index 0000000..e946207 --- /dev/null +++ b/archives.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3c1fbec02224f3fec068f42a9e80e42476052cbabe52e7759ac4f7521608d6 +size 125691971 diff --git a/book/docs/Standard Book Sizes, Dimension, and Specifications - Chart | Blurb.webloc b/book/docs/Standard Book Sizes, Dimension, and Specifications - Chart | Blurb.webloc new file mode 100644 index 0000000..c1ccfde Binary files /dev/null and b/book/docs/Standard Book Sizes, Dimension, and Specifications - Chart | Blurb.webloc differ diff --git a/book/docs/Thread text among frames in Adobe InDesign.webloc b/book/docs/Thread text among frames in Adobe InDesign.webloc new file mode 100644 index 0000000..91fbf6e Binary files /dev/null and b/book/docs/Thread text among frames in Adobe InDesign.webloc differ diff --git a/book/docs/indesign_and_xml_technical_reference.pdf b/book/docs/indesign_and_xml_technical_reference.pdf new file mode 100644 index 0000000..b8fca8c --- /dev/null +++ b/book/docs/indesign_and_xml_technical_reference.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81aa36fa6f57b8bbb4953ab4cfe708bd7068616ad57fd704666c90c7f40de95 +size 2982653 diff --git a/book/template-essay-chapter-section.indt b/book/template-essay-chapter-section.indt new file mode 100644 index 0000000..48f30f9 Binary files /dev/null and b/book/template-essay-chapter-section.indt differ diff --git a/book/template-essay.indt b/book/template-essay.indt new file mode 100644 index 0000000..c7ff110 Binary files /dev/null and b/book/template-essay.indt differ diff --git a/export_xml.py b/export_xml.py new file mode 100644 index 0000000..2ffb6ae --- /dev/null +++ b/export_xml.py @@ -0,0 +1,116 @@ +import argparse, os, glob, sys, json +import xml.etree.ElementTree as et +from datetime import datetime + +def format_subject(s): + return ' '.join(s.split()) + +def format_content(c): + return c.strip().replace("\n\n\n", "\n") + +def parse_date(fname): + return datetime.strptime(fname, '%B_%Y.json') + +def emit_mail_xml(msg, xmlel): + + mail = et.SubElement(xmlel, 'mail') + + subject = et.SubElement(mail, 'subject') + subject.text = format_subject(msg['subject']) + + to = et.SubElement(mail, 'to') + to.text = msg['to'] + + from_ = et.SubElement(mail, 'from') + from_.text = msg['from'] + + date = et.SubElement(mail, 'date') + date.text = msg['date'] + + content = et.SubElement(mail, 'content') + ## unescape chars ... + content.text = format_content(msg['content']) + + if msg['follow-up']: + print('follow-up') + + # **** RECURSIVE "follow-up" **** + + +def export_file(f, fout): + + with open(f) as fp: + d = json.load(fp) + + all_mail = et.Element('all') + for t in d['threads']: + emit_mail_xml(t, all_mail) + + fout.write(et.tostring(all_mail)) + +def export_year(d, dt, fout): + + dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))] + + chapter = et.Element('chapter') + year = et.SubElement(chapter, 'year') + year.text = dt.strftime('%Y') + + # SORT MONTHS BEFORE WRITING TO XML + dates = [] + for f in dir_files: + + fdt = parse_date(os.path.basename(f)) + if dt.year != fdt.year: + continue + + dates.append((fdt, f)) + + dates.sort(key=lambda tup: tup[0]) + + for d, f in dates: + + print(d) + print(f) + + section = et.SubElement(chapter, 'section') + month = et.SubElement(section, 'month') + month.text = d.strftime('%B') + + with open(f) as fp: + dj = json.load(fp) + + mails = et.SubElement(section, 'mails') + for t in dj['threads']: + emit_mail_xml(t, mails) + + fout.write(et.tostring(chapter)) + +if __name__ == "__main__": + + p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') + p.add_argument('file', metavar="f", help="mailinglist file(s) or dir(s) to export", nargs="+") + p.add_argument('--year', '-y', metavar='y', type=str, help='year of archive') + + args = p.parse_args() + + if args.year: + dt = datetime.strptime(args.year, '%Y') + + if not args.file: + sys.exit('No file(s). Aborting.') + +# with open("out.xml", "w") as fout: + for f in args.file: + if args.year: + if not os.path.isdir(f): + sys.exit(f + ' is not a valid directory. Aborting.') + foutname = os.path.basename(f) + "_" + dt.strftime('%Y') + ".xml" + with open(foutname, "wb") as fout: + export_year(f, dt, fout) + else: + if not os.path.isfile(f): + sys.exit(f + ' is not a valid file. Aborting.') + export_file(f, fout) + + diff --git a/rename_nettime.py b/rename_nettime.py new file mode 100644 index 0000000..000c91f --- /dev/null +++ b/rename_nettime.py @@ -0,0 +1,15 @@ +import argparse, os, sys, glob, json +from datetime import datetime + +def new_name(n): + dt = datetime.strptime(n, 'nettime-l_%b_%y.json') + return dt.strftime('%B_%Y') + '.json' + +if __name__ == "__main__": + + files = [f for f in glob.glob("archives/nettime-l/*.json")] + + for f in files: + newname = 'archives/nettime-l/' + new_name(os.path.basename(f)) + os.rename(f, newname) + diff --git a/stats.py b/stats.py new file mode 100644 index 0000000..ccf7de7 --- /dev/null +++ b/stats.py @@ -0,0 +1,56 @@ +import argparse, os, sys, glob, json + +ARCH = "archives/" + + +def run(l): + + if not l.startswith(ARCH): + l = os.path.join(ARCH, l) + + if not os.path.isdir(l): + sys.exit(l + ' is not a valid archive. Aborting.') + + files = [f for f in glob.glob(os.path.join(l, "*.json"))] + + total_chars = 0 + total_words = 0 + total_lines = 0 + for f in files: + with open(f) as fp: + d = json.load(fp) + # print(d['name']) + chars = 0 + words = 0 + lines = 0 + for t in d['threads']: + chars += len(t["content"]) + words += len(t["content"].split()) + lines += len(t["content"].split('\n')) + # print(" chars: " + str(chars)) + # print(" words: " + str(words)) + # print(" lines: " + str(lines)) + total_chars += chars + total_words += words + total_lines += lines + + print("\n\n" + l) + print("Total chars: " + str(total_chars)) + print("Total words: " + str(total_words)) + print("Total lines: " + str(total_lines)) + + +if __name__ == "__main__": + + p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') + p.add_argument('list', metavar="list", help="mailinglist to ana", nargs="+") + + args = p.parse_args() + + if not args.list: + sys.exit('No list(s). Aborting.') + + for l in args.list: + run(l) + + print("\n\n . . . . ")