display and report
This commit is contained in:
parent
cb7e7825c0
commit
9a2badf32a
101
nettime/format.py
Normal file
101
nettime/format.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
import query
|
||||||
|
import logging, html
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
class Html:
|
||||||
|
|
||||||
|
query = None
|
||||||
|
|
||||||
|
def __init__(self, q=None):
|
||||||
|
|
||||||
|
if not isinstance(q, query.Query):
|
||||||
|
logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
self.query = q
|
||||||
|
|
||||||
|
def threads_ranking(self, rank=5):
|
||||||
|
|
||||||
|
data = self.query.threads_ranking(rank=rank)
|
||||||
|
|
||||||
|
h = html.HTML()
|
||||||
|
t = h.table()
|
||||||
|
|
||||||
|
r = t.tr
|
||||||
|
r.td('date', klass='td_date_t')
|
||||||
|
r.td('from', klass='td_from_t')
|
||||||
|
r.td('replies', klass='td_rep_t')
|
||||||
|
r.td('subject', klass='td_subject_t')
|
||||||
|
|
||||||
|
for i, row in data.iterrows():
|
||||||
|
r = t.tr
|
||||||
|
r.td(str(row['date']), klass='td_date')
|
||||||
|
r.td(row['from'], klass='td_from')
|
||||||
|
r.td(str(row['nbr-references']), klass='td_rep')
|
||||||
|
r.td('', klass='td_subject').text(str(h.a(row['subject'], href=row['url'])), escape=False)
|
||||||
|
|
||||||
|
return str(t)
|
||||||
|
|
||||||
|
def from_dataframe(self, data_frame, table_name=None, name_map={}):
|
||||||
|
|
||||||
|
header = []
|
||||||
|
header.append(data_frame.index.name)
|
||||||
|
for h in data_frame.columns:
|
||||||
|
if h in name_map:
|
||||||
|
h = name_map[h]
|
||||||
|
header.append(h)
|
||||||
|
|
||||||
|
css_header = []
|
||||||
|
css_element = []
|
||||||
|
for i in header:
|
||||||
|
css_header.append('td_' + i + '_t')
|
||||||
|
css_element.append('td_' + i)
|
||||||
|
|
||||||
|
h = html.HTML()
|
||||||
|
if table_name:
|
||||||
|
t = h.table(id=table_name, klass=table_name + '_t')
|
||||||
|
else:
|
||||||
|
t = h.table()
|
||||||
|
|
||||||
|
#header
|
||||||
|
r = t.tr
|
||||||
|
n = 0
|
||||||
|
for j in header:
|
||||||
|
r.td(str(j), klass=css_header[n])
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
#elements
|
||||||
|
for k, row in data_frame.iterrows():
|
||||||
|
r = t.tr
|
||||||
|
r.td(str(k), klass=css_element[0])
|
||||||
|
n = 1
|
||||||
|
for l in row:
|
||||||
|
r.td(str(l), klass=css_element[n])
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
return str(t)
|
||||||
|
|
||||||
|
class Tab:
|
||||||
|
|
||||||
|
query = None
|
||||||
|
|
||||||
|
def __init__(self, q=None):
|
||||||
|
|
||||||
|
if not isinstance(q, query.Query):
|
||||||
|
logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
self.query = q
|
||||||
|
|
||||||
|
def from_dataframe(self, data_frame, name_map={}):
|
||||||
|
|
||||||
|
header = []
|
||||||
|
header.append(data_frame.index.name)
|
||||||
|
for h in data_frame.columns:
|
||||||
|
if h in name_map:
|
||||||
|
h = name_map[h]
|
||||||
|
header.append(h)
|
||||||
|
|
||||||
|
return tabulate(data_frame, headers=header)
|
||||||
|
|
||||||
|
|
||||||
70
nettime/plot.py
Normal file
70
nettime/plot.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import query
|
||||||
|
|
||||||
|
# for colormaps see:
|
||||||
|
# http://scipy.github.io/old-wiki/pages/Cookbook/Matplotlib/Show_colormaps
|
||||||
|
# http://pandas.pydata.org/pandas-docs/stable/visualization.html#colormaps
|
||||||
|
# http://matplotlib.org/examples/color/colormaps_reference.html
|
||||||
|
# for colors see:
|
||||||
|
# http://matplotlib.org/examples/color/named_colors.html
|
||||||
|
|
||||||
|
def bar_plot_series(series, title, color='blueviolet'):
|
||||||
|
return series.plot(kind = 'bar', title=title, color=color, alpha=0.8, stacked=True)
|
||||||
|
|
||||||
|
class Plot:
|
||||||
|
|
||||||
|
query = None
|
||||||
|
|
||||||
|
def __init__(self, q=None):
|
||||||
|
|
||||||
|
if not isinstance(q, query.Query):
|
||||||
|
logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
self.query = q
|
||||||
|
|
||||||
|
'''
|
||||||
|
activity
|
||||||
|
'''
|
||||||
|
|
||||||
|
def activity_from_ranking(self, resolution='y', rank=5, colormap='spectral', figsize=(8, 7)):
|
||||||
|
|
||||||
|
activity_rank = self.query.activity_from_ranking(rank=rank, series=True).keys()
|
||||||
|
series = []
|
||||||
|
for k in activity_rank:
|
||||||
|
series.append(self.query.activity_from(k, resolution, series=True))
|
||||||
|
|
||||||
|
df = pd.concat(series, axis=1)
|
||||||
|
|
||||||
|
return df.plot.area(colormap='spectral', figsize=figsize, stacked=False)
|
||||||
|
|
||||||
|
'''
|
||||||
|
content lenght
|
||||||
|
'''
|
||||||
|
|
||||||
|
def content_length_from_ranking(self, resolution='y', rank=5, colormap='spectral', figsize=(8, 7)):
|
||||||
|
|
||||||
|
content_rank = self.query.content_length_from_ranking(rank=rank, series=True).keys()
|
||||||
|
series = []
|
||||||
|
for k in content_rank:
|
||||||
|
series.append(self.query.content_length_from(k, resolution, series=True))
|
||||||
|
|
||||||
|
df = pd.concat(series, axis=1)
|
||||||
|
|
||||||
|
return df.plot.area(colormap=colormap, figsize=figsize, stacked=False)
|
||||||
|
|
||||||
|
'''
|
||||||
|
threads
|
||||||
|
'''
|
||||||
|
|
||||||
|
def threads_from_ranking(self, resolution='y', rank=5, colormap='spectral', figsize=(8, 7)):
|
||||||
|
|
||||||
|
threads_rank = self.query.threads_from_ranking(rank=rank, series=True).keys()
|
||||||
|
series = []
|
||||||
|
for k in threads_rank:
|
||||||
|
series.append(self.query.threads_from(k, resolution, series=True))
|
||||||
|
|
||||||
|
df = pd.concat(series, axis=1)
|
||||||
|
|
||||||
|
return df.plot.area(colormap=colormap, figsize=figsize, stacked=False)
|
||||||
273
nettime/query.py
273
nettime/query.py
@ -1,7 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import archive
|
import archive
|
||||||
import logging, html
|
import logging
|
||||||
|
|
||||||
class Query:
|
class Query:
|
||||||
|
|
||||||
@ -30,65 +30,78 @@ class Query:
|
|||||||
|
|
||||||
return self.activity
|
return self.activity
|
||||||
|
|
||||||
def activity_from(self, email_address, resolution='y'):
|
def activity_from(self, email_address, resolution='y', series=False):
|
||||||
|
|
||||||
eaddr = email_address.replace('@', '{at}').lower()
|
eaddr = email_address.replace('@', '{at}').lower()
|
||||||
|
|
||||||
self._activity()
|
freq = 'M'
|
||||||
try:
|
if resolution.lower() == 'y':
|
||||||
if resolution.lower() == 'm':
|
freq = 'AS'
|
||||||
return self.activity[eaddr]
|
elif resolution.lower() == 'm':
|
||||||
elif resolution.lower() == 'y':
|
freq = 'M'
|
||||||
y = self.activity[eaddr].resample('AS').sum()
|
|
||||||
y.index = y.index.year
|
|
||||||
return y
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
self._activity()
|
||||||
|
try:
|
||||||
|
af = self.activity[eaddr]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def activity_overall(self, resolution='y'):
|
activity_from = af.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
||||||
|
|
||||||
self._activity()
|
if freq == 'AS':
|
||||||
try:
|
activity_from.index = activity_from.index.format(formatter=lambda x: x.strftime('%Y'))
|
||||||
sum_activity_month = self.activity.sum(axis=1)
|
activity_from.index.name = 'year'
|
||||||
if resolution.lower() == 'm':
|
|
||||||
sum_activity_month.rename
|
|
||||||
return sum_activity_month
|
|
||||||
elif resolution.lower() == 'y':
|
|
||||||
y = sum_activity_month.resample('AS').sum()
|
|
||||||
y.index = y.index.year
|
|
||||||
return y
|
|
||||||
else:
|
else:
|
||||||
return None
|
activity_from.index = activity_from.index.format(formatter=lambda x: x.strftime('%Y-%m'))
|
||||||
except:
|
activity_from.index.name = 'year-month'
|
||||||
return None
|
|
||||||
|
if series:
|
||||||
|
return activity_from
|
||||||
|
|
||||||
|
return activity_from.to_frame('nbr-messages').astype(int)
|
||||||
|
|
||||||
|
def activity_from_ranking(self, rank=5, filter_nettime=True, series=False):
|
||||||
|
|
||||||
def activity_from_ranking(self, resolution='y', rank=5, filter_nettime=True):
|
|
||||||
# finish this -- re resolution AND filtering
|
|
||||||
self._activity()
|
self._activity()
|
||||||
afr = self.activity.sum(axis=0).order(ascending=False)
|
afr = self.activity.sum(axis=0).order(ascending=False)
|
||||||
if filter_nettime:
|
if filter_nettime:
|
||||||
p = r'^((?!nettime*).)*$'
|
p = r'^((?!nettime*).)*$'
|
||||||
afr = afr[afr.index.str.contains(p)]
|
afr = afr[afr.index.str.contains(p)]
|
||||||
|
|
||||||
|
if series:
|
||||||
return afr[:rank]
|
return afr[:rank]
|
||||||
|
|
||||||
def plot_activity_from_ranking(self, resolution='y', rank=5, figsize=(8, 7)):
|
return afr[:rank].to_frame('nbr-messages').astype(int)
|
||||||
|
|
||||||
activity_rank = self.activity_from_ranking(rank=rank).keys()
|
|
||||||
series = []
|
|
||||||
for k in activity_rank:
|
|
||||||
series.append(self.activity_from(k, resolution))
|
|
||||||
|
|
||||||
df = pd.concat(series, axis=1)
|
def activity_overall(self, resolution='y', series=False):
|
||||||
|
|
||||||
colors = np.random.rand(len(df),3)
|
freq = 'M'
|
||||||
|
if resolution.lower() == 'y':
|
||||||
if figsize:
|
freq = 'AS'
|
||||||
df.plot(colors=colors, figsize=figsize)
|
elif resolution.lower() == 'm':
|
||||||
|
freq = 'M'
|
||||||
else:
|
else:
|
||||||
df.plot(colors=colors)
|
return None
|
||||||
|
|
||||||
|
self._activity()
|
||||||
|
|
||||||
|
y = self.activity.sum(axis=1)
|
||||||
|
y = y.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
||||||
|
|
||||||
|
if freq == 'AS':
|
||||||
|
y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
|
||||||
|
y.index.name = 'year'
|
||||||
|
else:
|
||||||
|
y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
|
||||||
|
y.index.name = 'year-month'
|
||||||
|
|
||||||
|
if series:
|
||||||
|
return y
|
||||||
|
|
||||||
|
return y.to_frame('nbr-messages').astype(int)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
content lenght
|
content lenght
|
||||||
@ -103,63 +116,78 @@ class Query:
|
|||||||
|
|
||||||
return self.content_length
|
return self.content_length
|
||||||
|
|
||||||
def content_length_from(self, email_address, resolution='y'):
|
def content_length_from(self, email_address, resolution='y', series=False):
|
||||||
|
|
||||||
eaddr = email_address.replace('@', '{at}').lower()
|
eaddr = email_address.replace('@', '{at}').lower()
|
||||||
|
|
||||||
self._content_length()
|
freq = 'M'
|
||||||
try:
|
if resolution.lower() == 'y':
|
||||||
if resolution.lower() == 'm':
|
freq = 'AS'
|
||||||
return self.content_length[eaddr]
|
elif resolution.lower() == 'm':
|
||||||
elif resolution.lower() == 'y':
|
freq = 'M'
|
||||||
y = self.content_length[eaddr].resample('AS').sum()
|
|
||||||
y.index = y.index.year
|
|
||||||
return y
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
self._content_length()
|
||||||
|
try:
|
||||||
|
af = self.content_length[eaddr]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def content_length_overall(self):
|
content_length_from = af.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
||||||
|
|
||||||
self._content_length()
|
if freq == 'AS':
|
||||||
try:
|
content_length_from.index = content_length_from.index.format(formatter=lambda x: x.strftime('%Y'))
|
||||||
sum_content_length_month = self.content_length.sum(axis=1)
|
content_length_from.index.name = 'year'
|
||||||
if resolution.lower() == 'm':
|
|
||||||
return sum_content_length_month
|
|
||||||
elif resolution.lower() == 'y':
|
|
||||||
y = sum_content_length_month.resample('AS').sum()
|
|
||||||
y.index = y.index.year
|
|
||||||
return y
|
|
||||||
else:
|
else:
|
||||||
return None
|
content_length_from.index = content_length_from.index.format(formatter=lambda x: x.strftime('%Y-%m'))
|
||||||
except:
|
content_length_from.index.name = 'year-month'
|
||||||
return None
|
|
||||||
|
if series:
|
||||||
|
return content_length_from
|
||||||
|
|
||||||
|
return content_length_from.to_frame('nbr-bytes').astype(int)
|
||||||
|
|
||||||
|
def content_length_from_ranking(self, resolution='y', rank=5, filter_nettime=True, series=False):
|
||||||
|
|
||||||
def content_length_from_ranking(self, resolution='y', rank=5, filter_nettime=True):
|
|
||||||
# finish this -- re resolution
|
|
||||||
self._content_length()
|
self._content_length()
|
||||||
cfr = self.content_length.sum(axis=0).order(ascending=False)
|
cfr = self.content_length.sum(axis=0).order(ascending=False)
|
||||||
if filter_nettime:
|
if filter_nettime:
|
||||||
p = r'^((?!nettime*).)*$'
|
p = r'^((?!nettime*).)*$'
|
||||||
cfr = cfr[cfr.index.str.contains(p)]
|
cfr = cfr[cfr.index.str.contains(p)]
|
||||||
|
|
||||||
|
if series:
|
||||||
return cfr[:rank]
|
return cfr[:rank]
|
||||||
|
|
||||||
def plot_content_length_from_ranking(self, resolution='y', rank=5, figsize=(8, 7)):
|
return cfr[:rank].to_frame('nbr-bytes').astype(int)
|
||||||
|
|
||||||
content_rank = self.content_length_from_ranking(rank=rank).keys()
|
def content_length_overall(self, resolution='y', series=False):
|
||||||
series = []
|
|
||||||
for k in content_rank:
|
|
||||||
series.append(self.content_length_from(k, resolution))
|
|
||||||
|
|
||||||
df = pd.concat(series, axis=1)
|
freq = 'M'
|
||||||
|
if resolution.lower() == 'y':
|
||||||
colors = np.random.rand(len(df),3)
|
freq = 'AS'
|
||||||
|
elif resolution.lower() == 'm':
|
||||||
if figsize:
|
freq = 'M'
|
||||||
df.plot(colors=colors, figsize=figsize)
|
|
||||||
else:
|
else:
|
||||||
df.plot(colors=colors)
|
return None
|
||||||
|
|
||||||
|
self._content_length()
|
||||||
|
|
||||||
|
y = self.content_length.sum(axis=1)
|
||||||
|
y = y.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
||||||
|
|
||||||
|
if freq == 'AS':
|
||||||
|
y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
|
||||||
|
y.index.name = 'year'
|
||||||
|
else:
|
||||||
|
y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
|
||||||
|
y.index.name = 'year-month'
|
||||||
|
|
||||||
|
if series:
|
||||||
|
return y
|
||||||
|
|
||||||
|
return y.to_frame('nbr-bytes').astype(int)
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
threads
|
threads
|
||||||
@ -171,37 +199,39 @@ class Query:
|
|||||||
self.threads = self.netarchive.dataframe[self.netarchive.dataframe['nbr-references'] > thresh].reindex(columns=['from','nbr-references','subject', 'url', 'message-id']).sort_values('nbr-references', ascending=False)
|
self.threads = self.netarchive.dataframe[self.netarchive.dataframe['nbr-references'] > thresh].reindex(columns=['from','nbr-references','subject', 'url', 'message-id']).sort_values('nbr-references', ascending=False)
|
||||||
return self.threads;
|
return self.threads;
|
||||||
|
|
||||||
def threads_ranking(self, rank=5, output=None):
|
def threads_ranking(self, rank=5, resolution=None):
|
||||||
|
|
||||||
self._threads()
|
self._threads()
|
||||||
|
|
||||||
|
if resolution == None:
|
||||||
data = self.threads.drop('message-id', axis=1)[:rank]
|
data = self.threads.drop('message-id', axis=1)[:rank]
|
||||||
data['date'] = data.index
|
return data.reindex_axis(['subject', 'from', 'nbr-references', 'url'], axis=1)
|
||||||
if output is None:
|
|
||||||
return data
|
|
||||||
elif output == 'string':
|
|
||||||
return data.to_string()
|
|
||||||
elif output == 'html':
|
|
||||||
h = html.HTML()
|
|
||||||
t = h.table()
|
|
||||||
|
|
||||||
r = t.tr
|
freq = 'M'
|
||||||
r.td('date', klass='td_date_t')
|
if resolution.lower() == 'y':
|
||||||
r.td('from', klass='td_from_t')
|
freq = 'AS'
|
||||||
r.td('replies', klass='td_rep_t')
|
elif resolution.lower() == 'm':
|
||||||
r.td('subject', klass='td_subject_t')
|
freq = 'M'
|
||||||
|
|
||||||
for i, row in data.iterrows():
|
|
||||||
r = t.tr
|
|
||||||
r.td(str(row['date']), klass='td_date')
|
|
||||||
r.td(row['from'], klass='td_from')
|
|
||||||
r.td(str(row['nbr-references']), klass='td_rep')
|
|
||||||
r.td('', klass='td_subject').text(str(h.a(row['subject'], href=row['url'])), escape=False)
|
|
||||||
|
|
||||||
return str(t)
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def threads_from(self, email_address, resolution='y'):
|
# get the threads ranking per time resolution
|
||||||
|
#
|
||||||
|
data = self.threads.drop('message-id', axis=1)
|
||||||
|
data = data.groupby([pd.TimeGrouper(freq=freq)])
|
||||||
|
r = {}
|
||||||
|
for k, v in data:
|
||||||
|
if freq == 'AS':
|
||||||
|
time_key = k.strftime('%Y')
|
||||||
|
else:
|
||||||
|
time_key = k.strftime('%Y-%m')
|
||||||
|
frame = v[:rank]
|
||||||
|
frame = frame.reindex_axis(['subject', 'from', 'nbr-references', 'url'], axis=1)
|
||||||
|
r[time_key] = frame
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def threads_from(self, email_address, resolution='y', series=False):
|
||||||
|
|
||||||
freq = 'M'
|
freq = 'M'
|
||||||
if resolution.lower() == 'y':
|
if resolution.lower() == 'y':
|
||||||
@ -219,9 +249,22 @@ class Query:
|
|||||||
threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
|
threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
|
||||||
threads_from_ranking = threads_from.groupby([pd.TimeGrouper(freq=freq), 'from']).sum()
|
threads_from_ranking = threads_from.groupby([pd.TimeGrouper(freq=freq), 'from']).sum()
|
||||||
threads_from_ranking = threads_from_ranking.reset_index().pivot(columns='from', index='date', values='nbr-references').fillna(0)
|
threads_from_ranking = threads_from_ranking.reset_index().pivot(columns='from', index='date', values='nbr-references').fillna(0)
|
||||||
|
|
||||||
|
if series:
|
||||||
return threads_from_ranking[eaddr]
|
return threads_from_ranking[eaddr]
|
||||||
|
|
||||||
def threads_from_ranking(self, rank=5, filter_nettime=True):
|
threads_from_ranking = threads_from_ranking[eaddr].to_frame('nbr-threads').astype(int)
|
||||||
|
|
||||||
|
if freq == 'AS':
|
||||||
|
threads_from_ranking.index = threads_from_ranking.index.format(formatter=lambda x: x.strftime('%Y'))
|
||||||
|
threads_from_ranking.index.name = 'year'
|
||||||
|
else:
|
||||||
|
threads_from_ranking.index = threads_from_ranking.index.format(formatter=lambda x: x.strftime('%Y-%m'))
|
||||||
|
threads_from_ranking.index.name = 'year-month'
|
||||||
|
|
||||||
|
return threads_from_ranking
|
||||||
|
|
||||||
|
def threads_from_ranking(self, rank=5, filter_nettime=True, series=False):
|
||||||
|
|
||||||
self._threads()
|
self._threads()
|
||||||
threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
|
threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
|
||||||
@ -233,24 +276,11 @@ class Query:
|
|||||||
p = r'^((?!nettime*).)*$'
|
p = r'^((?!nettime*).)*$'
|
||||||
tfr = tfr[tfr.index.str.contains(p)]
|
tfr = tfr[tfr.index.str.contains(p)]
|
||||||
|
|
||||||
|
if series:
|
||||||
return tfr[:rank]
|
return tfr[:rank]
|
||||||
|
|
||||||
def plot_threads_from_ranking(self, resolution='y', rank=5, figsize=(8, 7)):
|
tfr = tfr[:rank].to_frame('nbr-threads').astype(int)
|
||||||
|
return tfr
|
||||||
threads_rank = self.threads_from_ranking(rank=rank).keys()
|
|
||||||
series = []
|
|
||||||
for k in threads_rank:
|
|
||||||
series.append(self.threads_from(k, resolution))
|
|
||||||
|
|
||||||
df = pd.concat(series, axis=1)
|
|
||||||
|
|
||||||
colors = np.random.rand(len(df),3)
|
|
||||||
|
|
||||||
if figsize:
|
|
||||||
df.plot(colors=colors, figsize=figsize)
|
|
||||||
else:
|
|
||||||
df.plot(colors=colors)
|
|
||||||
|
|
||||||
|
|
||||||
def threads_overall(self, resolution='y', aggregate='sum', tresh=0):
|
def threads_overall(self, resolution='y', aggregate='sum', tresh=0):
|
||||||
|
|
||||||
@ -263,7 +293,7 @@ class Query:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
agg = aggregate.lower()
|
agg = aggregate.lower()
|
||||||
if not agg in ['sum', 'mean']:
|
if not agg in ['sum', 'mean', 'count']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not self.threads is None:
|
if not self.threads is None:
|
||||||
@ -273,11 +303,20 @@ class Query:
|
|||||||
self._threads(tresh)
|
self._threads(tresh)
|
||||||
|
|
||||||
if agg == 'sum':
|
if agg == 'sum':
|
||||||
|
# number of replies total (re: sum all the replies)
|
||||||
y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
||||||
else:
|
elif agg == 'mean':
|
||||||
y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).mean()
|
y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).mean()
|
||||||
|
else:
|
||||||
|
# number of threads (re: msgs with at least one reply)
|
||||||
|
y = self.threads['nbr-references'].groupby([pd.TimeGrouper(freq=freq)]).count()
|
||||||
|
y = y.to_frame('nbr-threads')
|
||||||
|
|
||||||
if freq == 'AS':
|
if freq == 'AS':
|
||||||
y.index = y.index.year
|
y.index = y.index.format(formatter=lambda x: x.strftime('%Y'))
|
||||||
|
y.index.name = 'year'
|
||||||
|
else:
|
||||||
|
y.index = y.index.format(formatter=lambda x: x.strftime('%Y-%m'))
|
||||||
|
y.index.name = 'year-month'
|
||||||
|
|
||||||
return y
|
return y
|
||||||
|
|||||||
90
nettime_report.py
Normal file
90
nettime_report.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
import nettime.query
|
||||||
|
import nettime.format
|
||||||
|
import nettime.plot
|
||||||
|
|
||||||
|
class Report:
|
||||||
|
|
||||||
|
query = None
|
||||||
|
matrix = None
|
||||||
|
|
||||||
|
def __init__(self, q=None):
|
||||||
|
|
||||||
|
if not isinstance(q, nettime.query.Query):
|
||||||
|
logging.error("HtmlFormat constructor Error: query must be of type nettime.query.Query")
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
self.query = q
|
||||||
|
|
||||||
|
'''
|
||||||
|
(basic) stats
|
||||||
|
'''
|
||||||
|
|
||||||
|
def matrix_msgs_threads(self):
|
||||||
|
|
||||||
|
if self.matrix is None:
|
||||||
|
|
||||||
|
# nbr messages
|
||||||
|
mat = self.query.activity_overall()
|
||||||
|
|
||||||
|
# nbr threads
|
||||||
|
mat['nbr-threads'] = self.query.threads_overall(aggregate='count')['nbr-threads']
|
||||||
|
|
||||||
|
# nbr replies
|
||||||
|
mat['nbr-replies'] = self.query.threads_overall(aggregate='sum')['nbr-references']
|
||||||
|
|
||||||
|
# nbr non-replies (aka. non-threads)
|
||||||
|
mat['nbr-single-messages'] = mat['nbr-messages'] - mat['nbr-replies'] - mat['nbr-threads']
|
||||||
|
|
||||||
|
# avg. rep per message
|
||||||
|
mat['avg-rep-per-msg'] = mat['nbr-replies'] / mat['nbr-messages']
|
||||||
|
|
||||||
|
# avg. rep per thread
|
||||||
|
mat['avg-rep-per-thrd'] = mat['nbr-replies'] / mat['nbr-threads']
|
||||||
|
# same as:
|
||||||
|
# mat['avg-rep-per-thrd'] = q.threads_overall(aggregate='mean')['nbr-references']
|
||||||
|
|
||||||
|
self.matrix = mat
|
||||||
|
|
||||||
|
return self.matrix
|
||||||
|
|
||||||
|
'''
|
||||||
|
plots
|
||||||
|
'''
|
||||||
|
|
||||||
|
def plot_nbr_msgs(self, title='Nbr. Messages', label='messages', color='mediumblue'):
|
||||||
|
|
||||||
|
self.matrix_msgs_threads()
|
||||||
|
|
||||||
|
nettime.plot.bar_plot_series(self.matrix['nbr-messages'].to_frame(label), title=title, color=color)
|
||||||
|
|
||||||
|
def plot_nbr_threads(self, title='Nbr. Threads', label='threads', color='crimson'):
|
||||||
|
|
||||||
|
self.matrix_msgs_threads()
|
||||||
|
|
||||||
|
nettime.plot.bar_plot_series(self.matrix['nbr-threads'].to_frame(label), title=title, color=color)
|
||||||
|
|
||||||
|
def plot_nbr_replies(self, title='Nbr. Replies in Threads', label='replies', color='dimgray'):
|
||||||
|
|
||||||
|
self.matrix_msgs_threads()
|
||||||
|
|
||||||
|
nettime.plot.bar_plot_series(self.matrix['nbr-replies'].to_frame(label), title=title, color=color)
|
||||||
|
|
||||||
|
def plot_avg_rep_p_msg(self, title='Avg. Replies per Messages', label='replies-per-messasges', color='limegreen'):
|
||||||
|
|
||||||
|
self.matrix_msgs_threads()
|
||||||
|
|
||||||
|
nettime.plot.bar_plot_series(self.matrix['avg-rep-per-msg'].to_frame(label), title=title, color=color)
|
||||||
|
|
||||||
|
def plot_avg_rep_p_thrd(self, title='Avg. Replies per Thread', label='replies-per-thread', color='blueviolet'):
|
||||||
|
|
||||||
|
self.matrix_msgs_threads()
|
||||||
|
|
||||||
|
nettime.plot.bar_plot_series(self.matrix['avg-rep-per-thrd'].to_frame(label), title=title, color=color)
|
||||||
|
|
||||||
|
def plot_msgs_replies(self, title='Nbr. Messages segments (individual messages vs thread replies)'):
|
||||||
|
|
||||||
|
self.matrix_msgs_threads()
|
||||||
|
|
||||||
|
nettime.plot.bar_plot_series(self.matrix[['nbr-single-messages', 'nbr-threads', 'nbr-replies']], color=['mediumblue', 'red', 'dimgray'], title=title)
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user