reporting
This commit is contained in:
parent
9c6ea7e1be
commit
8a92b3b1be
5
list_all_files.py
Normal file
5
list_all_files.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
m = list(Path('.').rglob('*.py'))
|
||||||
|
for f in m:
|
||||||
|
print(f)
|
||||||
187
report.py
Normal file
187
report.py
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
import argparse, os, glob, sys, json, re, logging, hashlib
|
||||||
|
import mysql.connector as mariadb
|
||||||
|
from report import listsreport
|
||||||
|
from archive import archive
|
||||||
|
import config
|
||||||
|
from datetime import datetime
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
hashes = []
|
||||||
|
|
||||||
|
def hash(m):
|
||||||
|
return hashlib.sha256((m['from'] + m['subject'] + m['date']).encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def report_msg(msg):
|
||||||
|
global hashes
|
||||||
|
|
||||||
|
h = hash(msg)
|
||||||
|
if h in hashes:
|
||||||
|
# logging.warning("Duplicate: " + msg['from'] + " - " + msg['subject'] + " - " + msg['date'] + ". Skipping...")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
hashes.append(h)
|
||||||
|
|
||||||
|
chars = 0
|
||||||
|
words = 0
|
||||||
|
lines = 0
|
||||||
|
|
||||||
|
chars += len(msg["content"])
|
||||||
|
words += len(re.findall(r'\w+', msg["content"]))
|
||||||
|
lines += len(msg["content"].split('\n'))
|
||||||
|
|
||||||
|
if 'follow-up' in msg:
|
||||||
|
for f in msg['follow-up']:
|
||||||
|
x = report_msg(f)
|
||||||
|
if x is not None:
|
||||||
|
chars += x[0]
|
||||||
|
words += x[1]
|
||||||
|
lines += x[2]
|
||||||
|
|
||||||
|
return (chars, words, lines)
|
||||||
|
|
||||||
|
def year_filename(fn):
|
||||||
|
return datetime.strptime(fn, "%B_%Y.json").year
|
||||||
|
|
||||||
|
|
||||||
|
def report_all(l):
|
||||||
|
|
||||||
|
global hashes
|
||||||
|
|
||||||
|
ll = l
|
||||||
|
|
||||||
|
if not l.startswith(config.archives):
|
||||||
|
l = os.path.join(config.archives, l)
|
||||||
|
|
||||||
|
if not os.path.isdir(l):
|
||||||
|
sys.exit(l + ' is not a valid archive. Aborting.')
|
||||||
|
|
||||||
|
files = [f for f in glob.glob(os.path.join(l, "*.json"))]
|
||||||
|
|
||||||
|
report = {}
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
# print(os.path.basename(f))
|
||||||
|
|
||||||
|
with open(f) as fp:
|
||||||
|
d = json.load(fp)
|
||||||
|
|
||||||
|
for t in d['threads']:
|
||||||
|
x = report_msg(t)
|
||||||
|
|
||||||
|
year = year_filename(os.path.basename(f))
|
||||||
|
# print(year)
|
||||||
|
if year not in list(report.keys()):
|
||||||
|
report[year] = {'chars': 0, 'words': 0, 'lines': 0}
|
||||||
|
|
||||||
|
if x is not None:
|
||||||
|
report[year]['chars'] += x[0]
|
||||||
|
report[year]['words'] += x[1]
|
||||||
|
report[year]['lines'] += x[2]
|
||||||
|
|
||||||
|
total_chars = 0
|
||||||
|
total_words = 0
|
||||||
|
total_lines = 0
|
||||||
|
print("[" + ll + "] Number of written characters per year:")
|
||||||
|
sorted_report = collections.OrderedDict(sorted(report.items()))
|
||||||
|
for k, v in sorted_report.items():
|
||||||
|
print(" " + str(k) + ": " + str(v['chars']))
|
||||||
|
total_chars += v['chars']
|
||||||
|
print("[" + ll + "] Number of written words per year:")
|
||||||
|
for k, v in sorted_report.items():
|
||||||
|
print(" " + str(k) + ": " + str(v['words']))
|
||||||
|
total_words += v['words']
|
||||||
|
print("[" + ll + "] Number of written lines per year:")
|
||||||
|
for k, v in sorted_report.items():
|
||||||
|
print(" " + str(k) + ": " + str(v['lines']))
|
||||||
|
total_lines += v['lines']
|
||||||
|
|
||||||
|
|
||||||
|
print("[" + ll + "] Total chars: " + str(total_chars))
|
||||||
|
print("[" + ll + "] Total words: " + str(total_words))
|
||||||
|
print("[" + ll + "] Total lines: " + str(total_lines))
|
||||||
|
|
||||||
|
|
||||||
|
def format_from(f):
|
||||||
|
i = f.lower().find("{at}")
|
||||||
|
if i > 0:
|
||||||
|
return re.sub('[0-9a-zA-Z]', '*', f[:i]) + f[i:] + "; "
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser(description='Mailinglists report')
|
||||||
|
p.add_argument('lists', metavar="lists", help="lists to report", nargs="+")
|
||||||
|
p.add_argument('--txt', '-t', default=False, help="output as text", action="store_true")
|
||||||
|
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
db_con = archive.connect_db(config.db['database'], config.db['host'], config.db['user'], config.db['password'])
|
||||||
|
|
||||||
|
if db_con is None:
|
||||||
|
logging.warning("Not connection to database...")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = db_con.cursor(buffered=True)
|
||||||
|
|
||||||
|
report = {}
|
||||||
|
for li in args.lists:
|
||||||
|
report[li] = {}
|
||||||
|
# contributors
|
||||||
|
contribs = listsreport.contributors(li, cursor)
|
||||||
|
contribs_year = listsreport.contributors(li, cursor, per_year=True)
|
||||||
|
report[li]['contributors'] = contribs
|
||||||
|
report[li]['contributors_year'] = contribs_year
|
||||||
|
|
||||||
|
# size text
|
||||||
|
text_size = listsreport.text_size(li, cursor)
|
||||||
|
text_size_year = listsreport.text_size(li, cursor, per_year=True)
|
||||||
|
report[li]['text_size'] = text_size
|
||||||
|
report[li]['text_size_year'] = text_size_year
|
||||||
|
|
||||||
|
# contributions
|
||||||
|
contribs = listsreport.contributions(li, cursor)
|
||||||
|
contribs_year = listsreport.contributions(li, cursor, per_year=True)
|
||||||
|
report[li]['contributions'] = contribs
|
||||||
|
report[li]['contributions_year'] = contribs_year
|
||||||
|
|
||||||
|
except mariadb.Error as error:
|
||||||
|
logging.error("Error: {}".format(error))
|
||||||
|
finally:
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
|
if args.txt:
|
||||||
|
|
||||||
|
for k, v in report.items():
|
||||||
|
# print("Report: " + k)
|
||||||
|
|
||||||
|
# contributions
|
||||||
|
print("[" + k + "] Total contributions: " + v['contributions'])
|
||||||
|
print("[" + k + "] Contributions per year:")
|
||||||
|
for c in v['contributions_year']:
|
||||||
|
print(" " + c['year'] + ": " + c['val'])
|
||||||
|
|
||||||
|
report_all(k)
|
||||||
|
|
||||||
|
# print("[" + k + "] Total number of written characters: " + v['text_size'])
|
||||||
|
# print("[" + k + "] Number of written characters per year:")
|
||||||
|
# for c in v['text_size_year']:
|
||||||
|
# print(" " + c['year'] + ": " + c['val'])
|
||||||
|
|
||||||
|
print("[" + k + "] Total number of disctinct contributors' email address: " + str(len(v['contributors'])))
|
||||||
|
print("[" + k + "] Cohort of contributors per year:")
|
||||||
|
for c in v['contributions_year']:
|
||||||
|
print(" " + c['year'] + ": " + c['val'])
|
||||||
|
|
||||||
|
print("[" + k + "] List of contributors obfuscated addresses:")
|
||||||
|
list_contibutors = ""
|
||||||
|
for c in v['contributors']:
|
||||||
|
list_contibutors += format_from(c)
|
||||||
|
print(list_contibutors)
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(json.dumps(report))
|
||||||
0
report/__init__.py
Normal file
0
report/__init__.py
Normal file
80
report/listsreport.py
Normal file
80
report/listsreport.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
import mysql.connector as mariadb
|
||||||
|
|
||||||
|
'''
|
||||||
|
contributors
|
||||||
|
'''
|
||||||
|
CONTRIBUTORS_QUERY = ("SELECT DISTINCT(from_) FROM {} ")
|
||||||
|
CONTRIBUTORS_CNT_PER_YEAR_QUERY = ("SELECT YEAR(date_) theyear, COUNT(DISTINCT(from_)) FROM {} GROUP BY theyear")
|
||||||
|
|
||||||
|
'''
|
||||||
|
contributions
|
||||||
|
'''
|
||||||
|
CONTRIBUTIONS_QUERY = ("SELECT COUNT(*) FROM {}")
|
||||||
|
CONTRIBUTIONS_PER_YEAR_QUERY = ("SELECT YEAR(date_) theyear, COUNT(*) FROM {} GROUP BY theyear")
|
||||||
|
|
||||||
|
'''
|
||||||
|
size text
|
||||||
|
'''
|
||||||
|
SIZE_TEXT_QUERY = ("SELECT SUM(LENGTH(content_)) FROM {}")
|
||||||
|
SIZE_TEXT_PER_YEAR_QUERY = ("SELECT YEAR(date_) theyear, SUM(LENGTH(content_)) FROM {} GROUP BY theyear")
|
||||||
|
|
||||||
|
'''
|
||||||
|
nbr words
|
||||||
|
'''
|
||||||
|
SIZE_TEXT_QUERY = ("SELECT SUM(LENGTH(content_)) FROM {}")
|
||||||
|
SIZE_TEXT_PER_YEAR_QUERY = ("SELECT YEAR(date_) theyear, SUM(LENGTH(content_)) FROM {} GROUP BY theyear")
|
||||||
|
|
||||||
|
def contributors(li, cursor, per_year=False):
|
||||||
|
|
||||||
|
if per_year:
|
||||||
|
cursor.execute(CONTRIBUTORS_CNT_PER_YEAR_QUERY.format(li))
|
||||||
|
else:
|
||||||
|
cursor.execute(CONTRIBUTORS_QUERY.format(li))
|
||||||
|
|
||||||
|
results = []
|
||||||
|
if per_year:
|
||||||
|
for (year, val) in cursor:
|
||||||
|
results.append({'year' : str(year), 'val': str(val)})
|
||||||
|
else:
|
||||||
|
for c in cursor:
|
||||||
|
results += c
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def contributions(li, cursor, per_year=False):
|
||||||
|
|
||||||
|
if per_year:
|
||||||
|
cursor.execute(CONTRIBUTIONS_PER_YEAR_QUERY.format(li))
|
||||||
|
else:
|
||||||
|
cursor.execute(CONTRIBUTIONS_QUERY.format(li))
|
||||||
|
|
||||||
|
results = []
|
||||||
|
if per_year:
|
||||||
|
for (year, val) in cursor:
|
||||||
|
results.append({'year' : str(year), 'val': str(val)})
|
||||||
|
else:
|
||||||
|
for c in cursor:
|
||||||
|
return str(c[0])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def text_size(li, cursor, per_year=False):
|
||||||
|
|
||||||
|
if per_year:
|
||||||
|
cursor.execute(SIZE_TEXT_PER_YEAR_QUERY.format(li))
|
||||||
|
else:
|
||||||
|
cursor.execute(SIZE_TEXT_QUERY.format(li))
|
||||||
|
|
||||||
|
results = []
|
||||||
|
if per_year:
|
||||||
|
for (year, val) in cursor:
|
||||||
|
results.append({'year' : str(year), 'val': str(val)})
|
||||||
|
else:
|
||||||
|
for c in cursor:
|
||||||
|
return str(c[0])
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user