colophon
This commit is contained in:
parent
dd0d9469ea
commit
18ed080652
706
book/colophon.txt
Normal file
706
book/colophon.txt
Normal file
File diff suppressed because one or more lines are too long
5
list_all_files.py
Normal file
5
list_all_files.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
m = list(Path('.').rglob('*.py'))
|
||||||
|
for f in m:
|
||||||
|
print(f)
|
||||||
102
stats.py
102
stats.py
@ -1,40 +1,100 @@
|
|||||||
import argparse, os, sys, glob, json
|
import argparse, os, sys, glob, re, json, hashlib, logging
|
||||||
|
from datetime import datetime
|
||||||
|
import config
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
ARCH = "archives/"
|
ARCH = "archives/"
|
||||||
|
|
||||||
|
hashes = []
|
||||||
|
|
||||||
|
def hash(m):
|
||||||
|
return hashlib.sha256((m['from'] + m['subject'] + m['date']).encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def report_msg(msg):
|
||||||
|
global hashes
|
||||||
|
|
||||||
|
h = hash(msg)
|
||||||
|
if h in hashes:
|
||||||
|
# logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
hashes.append(h)
|
||||||
|
|
||||||
|
chars = 0
|
||||||
|
words = 0
|
||||||
|
lines = 0
|
||||||
|
|
||||||
|
chars += len(msg["content"])
|
||||||
|
words += len(re.findall(r'\w+', msg["content"]))
|
||||||
|
lines += len(msg["content"].split('\n'))
|
||||||
|
|
||||||
|
if 'follow-up' in msg:
|
||||||
|
for f in msg['follow-up']:
|
||||||
|
x = report_msg(f)
|
||||||
|
if x is not None:
|
||||||
|
chars += x[0]
|
||||||
|
words += x[1]
|
||||||
|
lines += x[2]
|
||||||
|
|
||||||
|
return (chars, words, lines)
|
||||||
|
|
||||||
|
def year_filename(fn):
|
||||||
|
return datetime.strptime(fn, "%B_%Y.json").year
|
||||||
|
|
||||||
|
|
||||||
def run(l):
|
def run(l):
|
||||||
|
|
||||||
if not l.startswith(ARCH):
|
global hashes
|
||||||
l = os.path.join(ARCH, l)
|
|
||||||
|
if not l.startswith(config.archives):
|
||||||
|
l = os.path.join(config.archives, l)
|
||||||
|
|
||||||
if not os.path.isdir(l):
|
if not os.path.isdir(l):
|
||||||
sys.exit(l + ' is not a valid archive. Aborting.')
|
sys.exit(l + ' is not a valid archive. Aborting.')
|
||||||
|
|
||||||
files = [f for f in glob.glob(os.path.join(l, "*.json"))]
|
files = [f for f in glob.glob(os.path.join(l, "*.json"))]
|
||||||
|
|
||||||
|
report = {}
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
# print(os.path.basename(f))
|
||||||
|
|
||||||
|
with open(f) as fp:
|
||||||
|
d = json.load(fp)
|
||||||
|
|
||||||
|
for t in d['threads']:
|
||||||
|
x = report_msg(t)
|
||||||
|
|
||||||
|
year = year_filename(os.path.basename(f))
|
||||||
|
# print(year)
|
||||||
|
if year not in list(report.keys()):
|
||||||
|
report[year] = {'chars': 0, 'words': 0, 'lines': 0}
|
||||||
|
|
||||||
|
if x is not None:
|
||||||
|
report[year]['chars'] += x[0]
|
||||||
|
report[year]['words'] += x[1]
|
||||||
|
report[year]['lines'] += x[2]
|
||||||
|
|
||||||
total_chars = 0
|
total_chars = 0
|
||||||
total_words = 0
|
total_words = 0
|
||||||
total_lines = 0
|
total_lines = 0
|
||||||
for f in files:
|
|
||||||
with open(f) as fp:
|
|
||||||
d = json.load(fp)
|
|
||||||
# print(d['name'])
|
|
||||||
chars = 0
|
|
||||||
words = 0
|
|
||||||
lines = 0
|
|
||||||
for t in d['threads']:
|
|
||||||
chars += len(t["content"])
|
|
||||||
words += len(t["content"].split())
|
|
||||||
lines += len(t["content"].split('\n'))
|
|
||||||
# print(" chars: " + str(chars))
|
|
||||||
# print(" words: " + str(words))
|
|
||||||
# print(" lines: " + str(lines))
|
|
||||||
total_chars += chars
|
|
||||||
total_words += words
|
|
||||||
total_lines += lines
|
|
||||||
|
|
||||||
print("\n\n" + l)
|
print("\n\n" + l)
|
||||||
|
print("Number of written characters per year:")
|
||||||
|
sorted_report = collections.OrderedDict(sorted(report.items()))
|
||||||
|
for k, v in sorted_report.items():
|
||||||
|
print(" " + str(k) + ": " + str(v['chars']))
|
||||||
|
total_chars += v['chars']
|
||||||
|
print("Number of written words per year:")
|
||||||
|
for k, v in sorted_report.items():
|
||||||
|
print(" " + str(k) + ": " + str(v['words']))
|
||||||
|
total_words += v['words']
|
||||||
|
print("Number of written lines per year:")
|
||||||
|
for k, v in sorted_report.items():
|
||||||
|
print(" " + str(k) + ": " + str(v['lines']))
|
||||||
|
total_lines += v['lines']
|
||||||
|
|
||||||
|
|
||||||
print("Total chars: " + str(total_chars))
|
print("Total chars: " + str(total_chars))
|
||||||
print("Total words: " + str(total_words))
|
print("Total words: " + str(total_words))
|
||||||
print("Total lines: " + str(total_lines))
|
print("Total lines: " + str(total_lines))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user