report handle
This commit is contained in:
parent
3ad4c920f6
commit
70d5181311
13
README
13
README
@ -1,9 +1,14 @@
|
||||
Usage: archive_nettime.py [options]
|
||||
Usage: archive.py [options]
|
||||
|
||||
Options:
|
||||
-h, --help show this help message and exit
|
||||
-u URL, --url=URL nettime url
|
||||
-l LIST, --list=LIST nettime's list name (ex: nettime-l)
|
||||
-a ARCH, --arch=ARCH path to archive directory
|
||||
(default='http://www.nettime.org/archives.php')
|
||||
-l LIST, --list=LIST nettime's list name (default=nettime-l)
|
||||
-a ARCH, --arch=ARCH path to archives directory (default='archives')
|
||||
|
||||
Dependencies: bs4
|
||||
Dependencies: bs4
|
||||
|
||||
---
|
||||
|
||||
|
||||
@ -248,5 +248,3 @@ def write_mbox_message(msg, mbox):
|
||||
for f in msg['follow-up']:
|
||||
write_mbox_message(f, mbox)
|
||||
|
||||
|
||||
|
||||
|
||||
174
nettime/report.py
Normal file
174
nettime/report.py
Normal file
@ -0,0 +1,174 @@
|
||||
import query
|
||||
import format
|
||||
import plot
|
||||
|
||||
class Report:
|
||||
|
||||
query = None
|
||||
matrix = None
|
||||
|
||||
def __init__(self, q=None):
|
||||
|
||||
if not isinstance(q, query.Query):
|
||||
logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
|
||||
raise Exception()
|
||||
|
||||
self.query = q
|
||||
|
||||
'''
|
||||
(basic) stats
|
||||
'''
|
||||
|
||||
def matrix_msgs_threads(self):
|
||||
|
||||
if self.matrix is None:
|
||||
|
||||
# nbr messages
|
||||
mat = self.query.activity_overall()
|
||||
|
||||
# nbr threads
|
||||
mat['nbr-threads'] = self.query.threads_overall(aggregate='count')['nbr-threads']
|
||||
|
||||
# nbr replies
|
||||
mat['nbr-replies'] = self.query.threads_overall(aggregate='sum')['nbr-references']
|
||||
|
||||
# nbr non-replies (aka. non-threads)
|
||||
mat['nbr-single-messages'] = mat['nbr-messages'] - mat['nbr-replies'] - mat['nbr-threads']
|
||||
|
||||
# avg. rep per message
|
||||
mat['avg--per-msg'] = mat['nbr-threads'] / mat['nbr-messages']
|
||||
|
||||
# avg. rep per thread
|
||||
mat['avg-rep-per-thrd'] = mat['nbr-replies'] / mat['nbr-threads']
|
||||
# same as:
|
||||
# mat['avg-rep-per-thrd'] = q.threads_overall(aggregate='mean')['nbr-references']
|
||||
|
||||
self.matrix = mat
|
||||
|
||||
return self.matrix
|
||||
|
||||
'''
|
||||
plots
|
||||
'''
|
||||
|
||||
def plot_nbr_msgs(self, title='Nbr. Messages', label='messages', color='mediumblue'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return plot.bar_plot_series(self.matrix['nbr-messages'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_nbr_threads(self, title='Nbr. Threads', label='threads', color='crimson'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return plot.bar_plot_series(self.matrix['nbr-threads'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_nbr_replies(self, title='Nbr. Replies in Threads', label='replies', color='dimgray'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return plot.bar_plot_series(self.matrix['nbr-replies'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_avg_rep_p_msg(self, title='Avg. Thread per Message', label='replies-per-messasges', color='limegreen'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return plot.bar_plot_series(self.matrix['avg--per-msg'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_avg_rep_p_thrd(self, title='Avg. Replies per Thread', label='replies-per-thread', color='blueviolet'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return plot.bar_plot_series(self.matrix['avg-rep-per-thrd'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_msgs_replies(self, title='Nbr. Messages segments (individual messages vs thread replies)'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return plot.bar_plot_series(self.matrix[['nbr-single-messages', 'nbr-threads', 'nbr-replies']], color=['mediumblue', 'red', 'dimgray'], title=title)
|
||||
|
||||
'''
|
||||
text (tabular)
|
||||
'''
|
||||
|
||||
def tab_msgs_threads_replies(self):
|
||||
self.matrix_msgs_threads()
|
||||
return format.Tab.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
|
||||
name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
|
||||
|
||||
def tab_avg_rep_msg_thrd(self):
|
||||
self.matrix_msgs_threads()
|
||||
return format.Tab.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
|
||||
name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
|
||||
|
||||
def tab_activity_from_ranking(self, rank=5):
|
||||
d = self.query.activity_from_ranking(rank=rank)
|
||||
return format.Tab.from_dataframe(d, name_map={'nbr-messages': 'messages'})
|
||||
|
||||
def tab_content_length_from_ranking(self, rank=5):
|
||||
d = self.query.activity_from_ranking(rank=rank)
|
||||
return format.Tab.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
|
||||
|
||||
def tab_threads_ranking(self, rank=5):
|
||||
d = self.query.threads_ranking(rank=rank)
|
||||
return format.Tab.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'})
|
||||
|
||||
def tab_threads_ranking_year(self, rank=5, resolution='y'):
|
||||
d = self.query.threads_ranking(rank=rank, resolution=resolution)
|
||||
years = sorted(d)
|
||||
nl = '\n'
|
||||
s = ""
|
||||
for i in years:
|
||||
s += 'year: ' + i + nl
|
||||
s += format.Tab.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}) + nl
|
||||
return s + nl
|
||||
|
||||
'''
|
||||
html
|
||||
'''
|
||||
|
||||
'''
|
||||
m-t-r
|
||||
'''
|
||||
def html_msgs_threads_replies(self):
|
||||
self.matrix_msgs_threads()
|
||||
return format.Html.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
|
||||
name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
|
||||
'''
|
||||
a-r-m-t
|
||||
'''
|
||||
def html_avg_rep_msg_thrd(self):
|
||||
self.matrix_msgs_threads()
|
||||
return format.Html.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
|
||||
name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
|
||||
'''
|
||||
a-f-r
|
||||
'''
|
||||
def html_activity_from_ranking(self, rank=5):
|
||||
html = format.Html(self.query)
|
||||
return html.threads_ranking(rank=rank)
|
||||
'''
|
||||
c-l-f-r
|
||||
'''
|
||||
def html_content_length_from_ranking(self, rank=5):
|
||||
d = self.query.activity_from_ranking(rank=rank)
|
||||
return format.Html.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
|
||||
'''
|
||||
t-r
|
||||
'''
|
||||
def html_threads_ranking(self, rank=5):
|
||||
d = self.query.threads_ranking(rank=rank)
|
||||
return format.Html.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'})
|
||||
|
||||
'''
|
||||
t-r-y
|
||||
'''
|
||||
def html_threads_ranking_year(self, rank=5, resolution='y'):
|
||||
d = self.query.threads_ranking(rank=rank, resolution=resolution)
|
||||
years = sorted(d)
|
||||
nl = '\n'
|
||||
s = ""
|
||||
for i in years:
|
||||
s += '<div class="year_t">' + i + '</div>' + nl
|
||||
s += format.Html.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'}) + nl
|
||||
return s + nl
|
||||
233
report.py
233
report.py
@ -1,182 +1,103 @@
|
||||
import sys, os, json, logging
|
||||
from optparse import OptionParser
|
||||
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf8')
|
||||
|
||||
logging.info('1/4 setting up matplotlib')
|
||||
# matplot view/windows
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
matplotlib.interactive(True)
|
||||
|
||||
logging.info('2/4 setting up pandas')
|
||||
# pd display
|
||||
import pandas as pd
|
||||
pd.set_option('display.max_colwidth', 100)
|
||||
|
||||
logging.info('3/4 loading nettime archive')
|
||||
import nettime.archive
|
||||
import nettime.query
|
||||
import nettime.format
|
||||
import nettime.plot
|
||||
import nettime.report
|
||||
|
||||
class Report:
|
||||
a = nettime.archive.Archive('nettime-l_2016-12-31.json.gz')
|
||||
q = nettime.query.Query(a)
|
||||
r = nettime.report.Report(q)
|
||||
|
||||
query = None
|
||||
matrix = None
|
||||
logging.info('4/4 reporting')
|
||||
|
||||
def __init__(self, q=None):
|
||||
|
||||
if not isinstance(q, nettime.query.Query):
|
||||
logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
|
||||
raise Exception()
|
||||
|
||||
self.query = q
|
||||
|
||||
'''
|
||||
(basic) stats
|
||||
'''
|
||||
|
||||
def matrix_msgs_threads(self):
|
||||
|
||||
if self.matrix is None:
|
||||
|
||||
# nbr messages
|
||||
mat = self.query.activity_overall()
|
||||
|
||||
# nbr threads
|
||||
mat['nbr-threads'] = self.query.threads_overall(aggregate='count')['nbr-threads']
|
||||
|
||||
# nbr replies
|
||||
mat['nbr-replies'] = self.query.threads_overall(aggregate='sum')['nbr-references']
|
||||
|
||||
# nbr non-replies (aka. non-threads)
|
||||
mat['nbr-single-messages'] = mat['nbr-messages'] - mat['nbr-replies'] - mat['nbr-threads']
|
||||
|
||||
# avg. rep per message
|
||||
mat['avg--per-msg'] = mat['nbr-threads'] / mat['nbr-messages']
|
||||
|
||||
# avg. rep per thread
|
||||
mat['avg-rep-per-thrd'] = mat['nbr-replies'] / mat['nbr-threads']
|
||||
# same as:
|
||||
# mat['avg-rep-per-thrd'] = q.threads_overall(aggregate='mean')['nbr-references']
|
||||
|
||||
self.matrix = mat
|
||||
|
||||
return self.matrix
|
||||
|
||||
'''
|
||||
plots
|
||||
'''
|
||||
|
||||
def plot_nbr_msgs(self, title='Nbr. Messages', label='messages', color='mediumblue'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return nettime.plot.bar_plot_series(self.matrix['nbr-messages'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_nbr_threads(self, title='Nbr. Threads', label='threads', color='crimson'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return nettime.plot.bar_plot_series(self.matrix['nbr-threads'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_nbr_replies(self, title='Nbr. Replies in Threads', label='replies', color='dimgray'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return nettime.plot.bar_plot_series(self.matrix['nbr-replies'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_avg_rep_p_msg(self, title='Avg. Thread per Message', label='replies-per-messasges', color='limegreen'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return nettime.plot.bar_plot_series(self.matrix['avg--per-msg'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_avg_rep_p_thrd(self, title='Avg. Replies per Thread', label='replies-per-thread', color='blueviolet'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return nettime.plot.bar_plot_series(self.matrix['avg-rep-per-thrd'].to_frame(label), title=title, color=color)
|
||||
|
||||
def plot_msgs_replies(self, title='Nbr. Messages segments (individual messages vs thread replies)'):
|
||||
|
||||
self.matrix_msgs_threads()
|
||||
|
||||
return nettime.plot.bar_plot_series(self.matrix[['nbr-single-messages', 'nbr-threads', 'nbr-replies']], color=['mediumblue', 'red', 'dimgray'], title=title)
|
||||
|
||||
'''
|
||||
text (tabular)
|
||||
'''
|
||||
|
||||
def tab_msgs_threads_replies(self):
|
||||
self.matrix_msgs_threads()
|
||||
return nettime.format.Tab.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
|
||||
name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
|
||||
|
||||
def tab_avg_rep_msg_thrd(self):
|
||||
self.matrix_msgs_threads()
|
||||
return nettime.format.Tab.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
|
||||
name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
|
||||
|
||||
def tab_activity_from_ranking(self, rank=5):
|
||||
d = self.query.activity_from_ranking(rank=rank)
|
||||
return nettime.format.Tab.from_dataframe(d, name_map={'nbr-messages': 'messages'})
|
||||
|
||||
def tab_content_length_from_ranking(self, rank=5):
|
||||
d = self.query.activity_from_ranking(rank=rank)
|
||||
return nettime.format.Tab.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
|
||||
|
||||
def tab_threads_ranking(self, rank=5):
|
||||
d = self.query.threads_ranking(rank=rank)
|
||||
return nettime.format.Tab.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'})
|
||||
|
||||
def tab_threads_ranking_year(self, rank=5, resolution='y'):
|
||||
d = self.query.threads_ranking(rank=rank, resolution=resolution)
|
||||
years = sorted(d)
|
||||
nl = '\n'
|
||||
s = ""
|
||||
for i in years:
|
||||
s += 'year: ' + i + nl
|
||||
s += nettime.format.Tab.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}) + nl
|
||||
return s + nl
|
||||
|
||||
'''
|
||||
html
|
||||
'''
|
||||
|
||||
def html_msgs_threads_replies(self):
|
||||
self.matrix_msgs_threads()
|
||||
return nettime.format.Html.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
|
||||
name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
|
||||
|
||||
def html_avg_rep_msg_thrd(self):
|
||||
self.matrix_msgs_threads()
|
||||
return nettime.format.Html.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
|
||||
name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
|
||||
|
||||
def html_activity_from_ranking(self, rank=5):
|
||||
html = nettime.format.Html(self.query)
|
||||
return html.threads_ranking(rank=rank)
|
||||
|
||||
def html_content_length_from_ranking(self, rank=5):
|
||||
d = self.query.activity_from_ranking(rank=rank)
|
||||
return nettime.format.Html.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
|
||||
|
||||
def html_threads_ranking(self, rank=5):
|
||||
d = self.query.threads_ranking(rank=rank)
|
||||
return nettime.format.Html.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'})
|
||||
|
||||
def html_threads_ranking_year(self, rank=5, resolution='y'):
|
||||
d = self.query.threads_ranking(rank=rank, resolution=resolution)
|
||||
years = sorted(d)
|
||||
nl = '\n'
|
||||
s = ""
|
||||
for i in years:
|
||||
s += '<div class="year_t">' + i + '</div>' + nl
|
||||
s += nettime.format.Html.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'}) + nl
|
||||
return s + nl
|
||||
def text(command, params=None):
|
||||
|
||||
print command
|
||||
|
||||
func = {
|
||||
"tab_msgs_threads_replies": r.tab_msgs_threads_replies,
|
||||
"tab_avg_rep_msg_thrd": r.tab_avg_rep_msg_thrd,
|
||||
"tab_activity_from_ranking": r.tab_activity_from_ranking,
|
||||
"tab_content_length_from_ranking": r.tab_content_length_from_ranking,
|
||||
"tab_threads_ranking": r.tab_threads_ranking,
|
||||
"tab_threads_ranking_year": r.tab_threads_ranking_year
|
||||
}
|
||||
|
||||
print func[command]
|
||||
|
||||
return func[command]()
|
||||
|
||||
def html(command, params=None):
|
||||
|
||||
func = {
|
||||
"html_msgs_threads_replies": r.html_msgs_threads_replies,
|
||||
"html_avg_rep_msg_thrd": r.html_avg_rep_msg_thrd,
|
||||
"html_activity_from_ranking": r.html_activity_from_ranking,
|
||||
"html_content_length_from_ranking": r.html_content_length_from_ranking,
|
||||
"html_threads_ranking": r.html_threads_ranking,
|
||||
"html_threads_ranking_year": r.html_threads_ranking_year
|
||||
}
|
||||
|
||||
return func[command]()
|
||||
|
||||
def run(options):
|
||||
|
||||
if options.output_file and os.path.isfile(options.output_file):
|
||||
with open(options.output_file, 'r') as fp:
|
||||
out = fp.read() # not optimal but will do
|
||||
else:
|
||||
print 'No output-file. Nothing to do.'
|
||||
return
|
||||
|
||||
if options.input_script and os.path.isfile(options.input_script):
|
||||
with open(options.input_script, 'r') as fp:
|
||||
input_script = json.load(fp)
|
||||
else:
|
||||
print 'No input-script. Nothing to do.'
|
||||
return
|
||||
|
||||
for cmd in input_script:
|
||||
|
||||
if cmd['format'] == 'html':
|
||||
func = html
|
||||
elif cmd['format'] == 'text':
|
||||
func = text
|
||||
else:
|
||||
continue
|
||||
|
||||
res = func(cmd['command'])
|
||||
|
||||
if res is not None:
|
||||
out = out.replace(cmd['replace'], res)
|
||||
|
||||
with open(options.output_file, 'w') as fp:
|
||||
fp.write(out) # not optimal but will do
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
p = OptionParser();
|
||||
p.add_option('-i', '--input-script', action="store", help="..")
|
||||
p.add_option('-o', '--output-file', action="store", help="..")
|
||||
|
||||
options, args = p.parse_args()
|
||||
|
||||
run(options)
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user