List_server_busy/stats.py

117 lines
2.6 KiB
Python
Raw Normal View History

2019-12-31 17:54:52 +01:00
import argparse, os, sys, glob, re, json, hashlib, logging
from datetime import datetime
import config
import collections
2019-11-27 15:45:18 +01:00
ARCH = "archives/"
2019-12-31 17:54:52 +01:00
hashes = []
def hash(m):
return hashlib.sha256((m['from'] + m['subject'] + m['date']).encode("utf-8")).hexdigest()
def report_msg(msg):
global hashes
h = hash(msg)
if h in hashes:
# logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
return None
else:
hashes.append(h)
chars = 0
words = 0
lines = 0
chars += len(msg["content"])
words += len(re.findall(r'\w+', msg["content"]))
lines += len(msg["content"].split('\n'))
if 'follow-up' in msg:
for f in msg['follow-up']:
x = report_msg(f)
if x is not None:
chars += x[0]
words += x[1]
lines += x[2]
return (chars, words, lines)
def year_filename(fn):
return datetime.strptime(fn, "%B_%Y.json").year
2019-11-27 15:45:18 +01:00
def run(l):
2019-12-31 17:54:52 +01:00
global hashes
if not l.startswith(config.archives):
l = os.path.join(config.archives, l)
2019-11-27 15:45:18 +01:00
if not os.path.isdir(l):
sys.exit(l + ' is not a valid archive. Aborting.')
files = [f for f in glob.glob(os.path.join(l, "*.json"))]
2019-12-31 17:54:52 +01:00
report = {}
2019-11-27 15:45:18 +01:00
for f in files:
2019-12-31 17:54:52 +01:00
# print(os.path.basename(f))
2019-11-27 15:45:18 +01:00
with open(f) as fp:
d = json.load(fp)
2019-12-31 17:54:52 +01:00
for t in d['threads']:
x = report_msg(t)
year = year_filename(os.path.basename(f))
# print(year)
if year not in list(report.keys()):
report[year] = {'chars': 0, 'words': 0, 'lines': 0}
if x is not None:
report[year]['chars'] += x[0]
report[year]['words'] += x[1]
report[year]['lines'] += x[2]
total_chars = 0
total_words = 0
total_lines = 0
2019-11-27 15:45:18 +01:00
print("\n\n" + l)
2019-12-31 17:54:52 +01:00
print("Number of written characters per year:")
sorted_report = collections.OrderedDict(sorted(report.items()))
for k, v in sorted_report.items():
print(" " + str(k) + ": " + str(v['chars']))
total_chars += v['chars']
print("Number of written words per year:")
for k, v in sorted_report.items():
print(" " + str(k) + ": " + str(v['words']))
total_words += v['words']
print("Number of written lines per year:")
for k, v in sorted_report.items():
print(" " + str(k) + ": " + str(v['lines']))
total_lines += v['lines']
2019-11-27 15:45:18 +01:00
print("Total chars: " + str(total_chars))
print("Total words: " + str(total_words))
print("Total lines: " + str(total_lines))
if __name__ == "__main__":
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
p.add_argument('list', metavar="list", help="mailinglist to ana", nargs="+")
args = p.parse_args()
if not args.list:
sys.exit('No list(s). Aborting.')
for l in args.list:
run(l)
print("\n\n . . . . ")