diff --git a/README b/README
index d9e8043..203677f 100644
--- a/README
+++ b/README
@@ -1,9 +1,14 @@
-Usage: archive_nettime.py [options]
+Usage: archive.py [options]
Options:
-h, --help show this help message and exit
-u URL, --url=URL nettime url
- -l LIST, --list=LIST nettime's list name (ex: nettime-l)
- -a ARCH, --arch=ARCH path to archive directory
+ (default='http://www.nettime.org/archives.php')
+ -l LIST, --list=LIST nettime's list name (default=nettime-l)
+ -a ARCH, --arch=ARCH path to archives directory (default='archives')
- Dependencies: bs4
\ No newline at end of file
+ Dependencies: bs4
+
+ ---
+
+
\ No newline at end of file
diff --git a/nettime/mhonarccrawl.py b/nettime/mhonarccrawl.py
index 7500425..192a0b2 100644
--- a/nettime/mhonarccrawl.py
+++ b/nettime/mhonarccrawl.py
@@ -248,5 +248,3 @@ def write_mbox_message(msg, mbox):
for f in msg['follow-up']:
write_mbox_message(f, mbox)
-
-
diff --git a/nettime/report.py b/nettime/report.py
new file mode 100644
index 0000000..c69e15d
--- /dev/null
+++ b/nettime/report.py
@@ -0,0 +1,174 @@
+import query
+import format
+import plot
+
+class Report:
+
+ query = None
+ matrix = None
+
+ def __init__(self, q=None):
+
+ if not isinstance(q, query.Query):
+ logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
+ raise Exception()
+
+ self.query = q
+
+ '''
+ (basic) stats
+ '''
+
+ def matrix_msgs_threads(self):
+
+ if self.matrix is None:
+
+ # nbr messages
+ mat = self.query.activity_overall()
+
+ # nbr threads
+ mat['nbr-threads'] = self.query.threads_overall(aggregate='count')['nbr-threads']
+
+ # nbr replies
+ mat['nbr-replies'] = self.query.threads_overall(aggregate='sum')['nbr-references']
+
+ # nbr non-replies (aka. non-threads)
+ mat['nbr-single-messages'] = mat['nbr-messages'] - mat['nbr-replies'] - mat['nbr-threads']
+
+ # avg. rep per message
+ mat['avg--per-msg'] = mat['nbr-threads'] / mat['nbr-messages']
+
+ # avg. rep per thread
+ mat['avg-rep-per-thrd'] = mat['nbr-replies'] / mat['nbr-threads']
+ # same as:
+ # mat['avg-rep-per-thrd'] = q.threads_overall(aggregate='mean')['nbr-references']
+
+ self.matrix = mat
+
+ return self.matrix
+
+ '''
+ plots
+ '''
+
+ def plot_nbr_msgs(self, title='Nbr. Messages', label='messages', color='mediumblue'):
+
+ self.matrix_msgs_threads()
+
+ return plot.bar_plot_series(self.matrix['nbr-messages'].to_frame(label), title=title, color=color)
+
+ def plot_nbr_threads(self, title='Nbr. Threads', label='threads', color='crimson'):
+
+ self.matrix_msgs_threads()
+
+ return plot.bar_plot_series(self.matrix['nbr-threads'].to_frame(label), title=title, color=color)
+
+ def plot_nbr_replies(self, title='Nbr. Replies in Threads', label='replies', color='dimgray'):
+
+ self.matrix_msgs_threads()
+
+ return plot.bar_plot_series(self.matrix['nbr-replies'].to_frame(label), title=title, color=color)
+
+ def plot_avg_rep_p_msg(self, title='Avg. Thread per Message', label='replies-per-messasges', color='limegreen'):
+
+ self.matrix_msgs_threads()
+
+ return plot.bar_plot_series(self.matrix['avg--per-msg'].to_frame(label), title=title, color=color)
+
+ def plot_avg_rep_p_thrd(self, title='Avg. Replies per Thread', label='replies-per-thread', color='blueviolet'):
+
+ self.matrix_msgs_threads()
+
+ return plot.bar_plot_series(self.matrix['avg-rep-per-thrd'].to_frame(label), title=title, color=color)
+
+ def plot_msgs_replies(self, title='Nbr. Messages segments (individual messages vs thread replies)'):
+
+ self.matrix_msgs_threads()
+
+ return plot.bar_plot_series(self.matrix[['nbr-single-messages', 'nbr-threads', 'nbr-replies']], color=['mediumblue', 'red', 'dimgray'], title=title)
+
+ '''
+ text (tabular)
+ '''
+
+ def tab_msgs_threads_replies(self):
+ self.matrix_msgs_threads()
+ return format.Tab.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
+ name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
+
+ def tab_avg_rep_msg_thrd(self):
+ self.matrix_msgs_threads()
+ return format.Tab.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
+ name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
+
+ def tab_activity_from_ranking(self, rank=5):
+ d = self.query.activity_from_ranking(rank=rank)
+ return format.Tab.from_dataframe(d, name_map={'nbr-messages': 'messages'})
+
+ def tab_content_length_from_ranking(self, rank=5):
+ d = self.query.activity_from_ranking(rank=rank)
+ return format.Tab.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
+
+ def tab_threads_ranking(self, rank=5):
+ d = self.query.threads_ranking(rank=rank)
+ return format.Tab.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'})
+
+ def tab_threads_ranking_year(self, rank=5, resolution='y'):
+ d = self.query.threads_ranking(rank=rank, resolution=resolution)
+ years = sorted(d)
+ nl = '\n'
+ s = ""
+ for i in years:
+ s += 'year: ' + i + nl
+ s += format.Tab.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}) + nl
+ return s + nl
+
+ '''
+ html
+ '''
+
+ '''
+ m-t-r
+ '''
+ def html_msgs_threads_replies(self):
+ self.matrix_msgs_threads()
+ return format.Html.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
+ name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
+ '''
+ a-r-m-t
+ '''
+ def html_avg_rep_msg_thrd(self):
+ self.matrix_msgs_threads()
+ return format.Html.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
+ name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
+ '''
+ a-f-r
+ '''
+ def html_activity_from_ranking(self, rank=5):
+ html = format.Html(self.query)
+ return html.threads_ranking(rank=rank)
+ '''
+ c-l-f-r
+ '''
+ def html_content_length_from_ranking(self, rank=5):
+ d = self.query.activity_from_ranking(rank=rank)
+ return format.Html.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
+ '''
+ t-r
+ '''
+ def html_threads_ranking(self, rank=5):
+ d = self.query.threads_ranking(rank=rank)
+ return format.Html.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'})
+
+ '''
+ t-r-y
+ '''
+ def html_threads_ranking_year(self, rank=5, resolution='y'):
+ d = self.query.threads_ranking(rank=rank, resolution=resolution)
+ years = sorted(d)
+ nl = '\n'
+ s = ""
+ for i in years:
+ s += '
' + i + '
' + nl
+ s += format.Html.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'}) + nl
+ return s + nl
diff --git a/report.py b/report.py
index 0a2baac..0aee3ae 100644
--- a/report.py
+++ b/report.py
@@ -1,182 +1,103 @@
+import sys, os, json, logging
+from optparse import OptionParser
+
+reload(sys)
+sys.setdefaultencoding('utf8')
+
+logging.info('1/4 setting up matplotlib')
+# matplot view/windows
+import matplotlib
+import matplotlib.pyplot as plt
+matplotlib.interactive(True)
+
+logging.info('2/4 setting up pandas')
+# pd display
+import pandas as pd
+pd.set_option('display.max_colwidth', 100)
+
+logging.info('3/4 loading nettime archive')
+import nettime.archive
import nettime.query
-import nettime.format
-import nettime.plot
+import nettime.report
-class Report:
+a = nettime.archive.Archive('nettime-l_2016-12-31.json.gz')
+q = nettime.query.Query(a)
+r = nettime.report.Report(q)
- query = None
- matrix = None
+logging.info('4/4 reporting')
- def __init__(self, q=None):
-
- if not isinstance(q, nettime.query.Query):
- logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
- raise Exception()
-
- self.query = q
-
- '''
- (basic) stats
- '''
-
- def matrix_msgs_threads(self):
-
- if self.matrix is None:
-
- # nbr messages
- mat = self.query.activity_overall()
-
- # nbr threads
- mat['nbr-threads'] = self.query.threads_overall(aggregate='count')['nbr-threads']
-
- # nbr replies
- mat['nbr-replies'] = self.query.threads_overall(aggregate='sum')['nbr-references']
-
- # nbr non-replies (aka. non-threads)
- mat['nbr-single-messages'] = mat['nbr-messages'] - mat['nbr-replies'] - mat['nbr-threads']
-
- # avg. rep per message
- mat['avg--per-msg'] = mat['nbr-threads'] / mat['nbr-messages']
-
- # avg. rep per thread
- mat['avg-rep-per-thrd'] = mat['nbr-replies'] / mat['nbr-threads']
- # same as:
- # mat['avg-rep-per-thrd'] = q.threads_overall(aggregate='mean')['nbr-references']
-
- self.matrix = mat
-
- return self.matrix
-
- '''
- plots
- '''
-
- def plot_nbr_msgs(self, title='Nbr. Messages', label='messages', color='mediumblue'):
-
- self.matrix_msgs_threads()
-
- return nettime.plot.bar_plot_series(self.matrix['nbr-messages'].to_frame(label), title=title, color=color)
-
- def plot_nbr_threads(self, title='Nbr. Threads', label='threads', color='crimson'):
-
- self.matrix_msgs_threads()
-
- return nettime.plot.bar_plot_series(self.matrix['nbr-threads'].to_frame(label), title=title, color=color)
-
- def plot_nbr_replies(self, title='Nbr. Replies in Threads', label='replies', color='dimgray'):
-
- self.matrix_msgs_threads()
-
- return nettime.plot.bar_plot_series(self.matrix['nbr-replies'].to_frame(label), title=title, color=color)
-
- def plot_avg_rep_p_msg(self, title='Avg. Thread per Message', label='replies-per-messasges', color='limegreen'):
-
- self.matrix_msgs_threads()
-
- return nettime.plot.bar_plot_series(self.matrix['avg--per-msg'].to_frame(label), title=title, color=color)
-
- def plot_avg_rep_p_thrd(self, title='Avg. Replies per Thread', label='replies-per-thread', color='blueviolet'):
-
- self.matrix_msgs_threads()
-
- return nettime.plot.bar_plot_series(self.matrix['avg-rep-per-thrd'].to_frame(label), title=title, color=color)
-
- def plot_msgs_replies(self, title='Nbr. Messages segments (individual messages vs thread replies)'):
-
- self.matrix_msgs_threads()
-
- return nettime.plot.bar_plot_series(self.matrix[['nbr-single-messages', 'nbr-threads', 'nbr-replies']], color=['mediumblue', 'red', 'dimgray'], title=title)
-
- '''
- text (tabular)
- '''
-
- def tab_msgs_threads_replies(self):
- self.matrix_msgs_threads()
- return nettime.format.Tab.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
- name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
-
- def tab_avg_rep_msg_thrd(self):
- self.matrix_msgs_threads()
- return nettime.format.Tab.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
- name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
-
- def tab_activity_from_ranking(self, rank=5):
- d = self.query.activity_from_ranking(rank=rank)
- return nettime.format.Tab.from_dataframe(d, name_map={'nbr-messages': 'messages'})
-
- def tab_content_length_from_ranking(self, rank=5):
- d = self.query.activity_from_ranking(rank=rank)
- return nettime.format.Tab.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
-
- def tab_threads_ranking(self, rank=5):
- d = self.query.threads_ranking(rank=rank)
- return nettime.format.Tab.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'})
-
- def tab_threads_ranking_year(self, rank=5, resolution='y'):
- d = self.query.threads_ranking(rank=rank, resolution=resolution)
- years = sorted(d)
- nl = '\n'
- s = ""
- for i in years:
- s += 'year: ' + i + nl
- s += nettime.format.Tab.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}) + nl
- return s + nl
-
- '''
- html
- '''
-
- def html_msgs_threads_replies(self):
- self.matrix_msgs_threads()
- return nettime.format.Html.from_dataframe(self.matrix[['nbr-messages', 'nbr-threads', 'nbr-replies']],
- name_map={'nbr-messages': 'messages', 'nbr-threads': 'threads', 'nbr-replies': 'replies in threads'})
-
- def html_avg_rep_msg_thrd(self):
- self.matrix_msgs_threads()
- return nettime.format.Html.from_dataframe(self.matrix[['avg--per-msg', 'avg-rep-per-thrd']],
- name_map={'avg--per-msg': 'avg. thread per message', 'avg-rep-per-thrd': 'avg. replies per thread'})
-
- def html_activity_from_ranking(self, rank=5):
- html = nettime.format.Html(self.query)
- return html.threads_ranking(rank=rank)
-
- def html_content_length_from_ranking(self, rank=5):
- d = self.query.activity_from_ranking(rank=rank)
- return nettime.format.Html.from_dataframe(d, name_map={'nbr-bytes': 'bytes'})
-
- def html_threads_ranking(self, rank=5):
- d = self.query.threads_ranking(rank=rank)
- return nettime.format.Html.from_dataframe(d, name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'})
-
- def html_threads_ranking_year(self, rank=5, resolution='y'):
- d = self.query.threads_ranking(rank=rank, resolution=resolution)
- years = sorted(d)
- nl = '\n'
- s = ""
- for i in years:
- s += '' + i + '
' + nl
- s += nettime.format.Html.from_dataframe(d[i], name_map={'nbr-references': 'nbr. replies'}, url_map={'subject': 'url'}) + nl
- return s + nl
+def text(command, params=None):
+ print command
+ func = {
+ "tab_msgs_threads_replies": r.tab_msgs_threads_replies,
+ "tab_avg_rep_msg_thrd": r.tab_avg_rep_msg_thrd,
+ "tab_activity_from_ranking": r.tab_activity_from_ranking,
+ "tab_content_length_from_ranking": r.tab_content_length_from_ranking,
+ "tab_threads_ranking": r.tab_threads_ranking,
+ "tab_threads_ranking_year": r.tab_threads_ranking_year
+ }
+ print func[command]
+ return func[command]()
+def html(command, params=None):
+ func = {
+ "html_msgs_threads_replies": r.html_msgs_threads_replies,
+ "html_avg_rep_msg_thrd": r.html_avg_rep_msg_thrd,
+ "html_activity_from_ranking": r.html_activity_from_ranking,
+ "html_content_length_from_ranking": r.html_content_length_from_ranking,
+ "html_threads_ranking": r.html_threads_ranking,
+ "html_threads_ranking_year": r.html_threads_ranking_year
+ }
+ return func[command]()
+def run(options):
+ if options.output_file and os.path.isfile(options.output_file):
+ with open(options.output_file, 'r') as fp:
+ out = fp.read() # not optimal but will do
+ else:
+ print 'No output-file. Nothing to do.'
+ return
+ if options.input_script and os.path.isfile(options.input_script):
+ with open(options.input_script, 'r') as fp:
+ input_script = json.load(fp)
+ else:
+ print 'No input-script. Nothing to do.'
+ return
+ for cmd in input_script:
+ if cmd['format'] == 'html':
+ func = html
+ elif cmd['format'] == 'text':
+ func = text
+ else:
+ continue
+ res = func(cmd['command'])
+ if res is not None:
+ out = out.replace(cmd['replace'], res)
+ with open(options.output_file, 'w') as fp:
+ fp.write(out) # not optimal but will do
+if __name__ == "__main__":
+ p = OptionParser();
+ p.add_option('-i', '--input-script', action="store", help="..")
+ p.add_option('-o', '--output-file', action="store", help="..")
+ options, args = p.parse_args()
+ run(options)