haha! commit
This commit is contained in:
parent
63163c0690
commit
f68999db50
3
.gitattributes
vendored
Normal file
3
.gitattributes
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
archives.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.xml filter=lfs diff=lfs merge=lfs -text
|
||||
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@ -1,3 +1,10 @@
|
||||
# project based
|
||||
|
||||
.DS_Store
|
||||
archives/
|
||||
|
||||
|
||||
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
BIN
archives.zip
(Stored with Git LFS)
Normal file
BIN
archives.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
Binary file not shown.
BIN
book/docs/Thread text among frames in Adobe InDesign.webloc
Normal file
BIN
book/docs/Thread text among frames in Adobe InDesign.webloc
Normal file
Binary file not shown.
BIN
book/docs/indesign_and_xml_technical_reference.pdf
(Stored with Git LFS)
Normal file
BIN
book/docs/indesign_and_xml_technical_reference.pdf
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
book/template-essay-chapter-section.indt
Normal file
BIN
book/template-essay-chapter-section.indt
Normal file
Binary file not shown.
BIN
book/template-essay.indt
Normal file
BIN
book/template-essay.indt
Normal file
Binary file not shown.
116
export_xml.py
Normal file
116
export_xml.py
Normal file
@ -0,0 +1,116 @@
|
||||
import argparse, os, glob, sys, json
|
||||
import xml.etree.ElementTree as et
|
||||
from datetime import datetime
|
||||
|
||||
def format_subject(s):
|
||||
return ' '.join(s.split())
|
||||
|
||||
def format_content(c):
|
||||
return c.strip().replace("\n\n\n", "\n")
|
||||
|
||||
def parse_date(fname):
|
||||
return datetime.strptime(fname, '%B_%Y.json')
|
||||
|
||||
def emit_mail_xml(msg, xmlel):
|
||||
|
||||
mail = et.SubElement(xmlel, 'mail')
|
||||
|
||||
subject = et.SubElement(mail, 'subject')
|
||||
subject.text = format_subject(msg['subject'])
|
||||
|
||||
to = et.SubElement(mail, 'to')
|
||||
to.text = msg['to']
|
||||
|
||||
from_ = et.SubElement(mail, 'from')
|
||||
from_.text = msg['from']
|
||||
|
||||
date = et.SubElement(mail, 'date')
|
||||
date.text = msg['date']
|
||||
|
||||
content = et.SubElement(mail, 'content')
|
||||
## unescape chars ...
|
||||
content.text = format_content(msg['content'])
|
||||
|
||||
if msg['follow-up']:
|
||||
print('follow-up')
|
||||
|
||||
# **** RECURSIVE "follow-up" ****
|
||||
|
||||
|
||||
def export_file(f, fout):
|
||||
|
||||
with open(f) as fp:
|
||||
d = json.load(fp)
|
||||
|
||||
all_mail = et.Element('all')
|
||||
for t in d['threads']:
|
||||
emit_mail_xml(t, all_mail)
|
||||
|
||||
fout.write(et.tostring(all_mail))
|
||||
|
||||
def export_year(d, dt, fout):
|
||||
|
||||
dir_files = [f for f in glob.glob(os.path.join(d, "*.json"))]
|
||||
|
||||
chapter = et.Element('chapter')
|
||||
year = et.SubElement(chapter, 'year')
|
||||
year.text = dt.strftime('%Y')
|
||||
|
||||
# SORT MONTHS BEFORE WRITING TO XML
|
||||
dates = []
|
||||
for f in dir_files:
|
||||
|
||||
fdt = parse_date(os.path.basename(f))
|
||||
if dt.year != fdt.year:
|
||||
continue
|
||||
|
||||
dates.append((fdt, f))
|
||||
|
||||
dates.sort(key=lambda tup: tup[0])
|
||||
|
||||
for d, f in dates:
|
||||
|
||||
print(d)
|
||||
print(f)
|
||||
|
||||
section = et.SubElement(chapter, 'section')
|
||||
month = et.SubElement(section, 'month')
|
||||
month.text = d.strftime('%B')
|
||||
|
||||
with open(f) as fp:
|
||||
dj = json.load(fp)
|
||||
|
||||
mails = et.SubElement(section, 'mails')
|
||||
for t in dj['threads']:
|
||||
emit_mail_xml(t, mails)
|
||||
|
||||
fout.write(et.tostring(chapter))
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
||||
p.add_argument('file', metavar="f", help="mailinglist file(s) or dir(s) to export", nargs="+")
|
||||
p.add_argument('--year', '-y', metavar='y', type=str, help='year of archive')
|
||||
|
||||
args = p.parse_args()
|
||||
|
||||
if args.year:
|
||||
dt = datetime.strptime(args.year, '%Y')
|
||||
|
||||
if not args.file:
|
||||
sys.exit('No file(s). Aborting.')
|
||||
|
||||
# with open("out.xml", "w") as fout:
|
||||
for f in args.file:
|
||||
if args.year:
|
||||
if not os.path.isdir(f):
|
||||
sys.exit(f + ' is not a valid directory. Aborting.')
|
||||
foutname = os.path.basename(f) + "_" + dt.strftime('%Y') + ".xml"
|
||||
with open(foutname, "wb") as fout:
|
||||
export_year(f, dt, fout)
|
||||
else:
|
||||
if not os.path.isfile(f):
|
||||
sys.exit(f + ' is not a valid file. Aborting.')
|
||||
export_file(f, fout)
|
||||
|
||||
|
||||
15
rename_nettime.py
Normal file
15
rename_nettime.py
Normal file
@ -0,0 +1,15 @@
|
||||
import argparse, os, sys, glob, json
|
||||
from datetime import datetime
|
||||
|
||||
def new_name(n):
|
||||
dt = datetime.strptime(n, 'nettime-l_%b_%y.json')
|
||||
return dt.strftime('%B_%Y') + '.json'
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
files = [f for f in glob.glob("archives/nettime-l/*.json")]
|
||||
|
||||
for f in files:
|
||||
newname = 'archives/nettime-l/' + new_name(os.path.basename(f))
|
||||
os.rename(f, newname)
|
||||
|
||||
56
stats.py
Normal file
56
stats.py
Normal file
@ -0,0 +1,56 @@
|
||||
import argparse, os, sys, glob, json
|
||||
|
||||
ARCH = "archives/"
|
||||
|
||||
|
||||
def run(l):
|
||||
|
||||
if not l.startswith(ARCH):
|
||||
l = os.path.join(ARCH, l)
|
||||
|
||||
if not os.path.isdir(l):
|
||||
sys.exit(l + ' is not a valid archive. Aborting.')
|
||||
|
||||
files = [f for f in glob.glob(os.path.join(l, "*.json"))]
|
||||
|
||||
total_chars = 0
|
||||
total_words = 0
|
||||
total_lines = 0
|
||||
for f in files:
|
||||
with open(f) as fp:
|
||||
d = json.load(fp)
|
||||
# print(d['name'])
|
||||
chars = 0
|
||||
words = 0
|
||||
lines = 0
|
||||
for t in d['threads']:
|
||||
chars += len(t["content"])
|
||||
words += len(t["content"].split())
|
||||
lines += len(t["content"].split('\n'))
|
||||
# print(" chars: " + str(chars))
|
||||
# print(" words: " + str(words))
|
||||
# print(" lines: " + str(lines))
|
||||
total_chars += chars
|
||||
total_words += words
|
||||
total_lines += lines
|
||||
|
||||
print("\n\n" + l)
|
||||
print("Total chars: " + str(total_chars))
|
||||
print("Total words: " + str(total_words))
|
||||
print("Total lines: " + str(total_lines))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
|
||||
p.add_argument('list', metavar="list", help="mailinglist to ana", nargs="+")
|
||||
|
||||
args = p.parse_args()
|
||||
|
||||
if not args.list:
|
||||
sys.exit('No list(s). Aborting.')
|
||||
|
||||
for l in args.list:
|
||||
run(l)
|
||||
|
||||
print("\n\n . . . . ")
|
||||
Loading…
x
Reference in New Issue
Block a user