threads
This commit is contained in:
parent
5e6c3bb095
commit
63181b37f3
@ -193,6 +193,7 @@ class Archive:
|
|||||||
try:
|
try:
|
||||||
sum_activity_month = self.activity.sum(axis=1)
|
sum_activity_month = self.activity.sum(axis=1)
|
||||||
if resolution.lower() == 'm':
|
if resolution.lower() == 'm':
|
||||||
|
sum_activity_month.rename
|
||||||
return sum_activity_month
|
return sum_activity_month
|
||||||
elif resolution.lower() == 'y':
|
elif resolution.lower() == 'y':
|
||||||
y = sum_activity_month.resample('AS').sum()
|
y = sum_activity_month.resample('AS').sum()
|
||||||
@ -303,10 +304,10 @@ class Archive:
|
|||||||
threads
|
threads
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def _threads(self):
|
def _threads(self, thresh=0):
|
||||||
|
|
||||||
if self.threads is None:
|
if self.threads is None:
|
||||||
self.threads = self.dataframe[self.dataframe['nbr-references'] > 0].reindex(columns=['from','nbr-references','subject', 'url', 'message-id']).sort_values('nbr-references', ascending=False)
|
self.threads = self.dataframe[self.dataframe['nbr-references'] > thresh].reindex(columns=['from','nbr-references','subject', 'url', 'message-id']).sort_values('nbr-references', ascending=False)
|
||||||
return self.threads;
|
return self.threads;
|
||||||
|
|
||||||
def threads_ranking(self, rank=5):
|
def threads_ranking(self, rank=5):
|
||||||
@ -314,4 +315,89 @@ class Archive:
|
|||||||
self._threads()
|
self._threads()
|
||||||
return self.threads.drop('message-id', axis=1)[:rank]
|
return self.threads.drop('message-id', axis=1)[:rank]
|
||||||
|
|
||||||
|
def threads_from(self, email_address, resolution='y'):
|
||||||
|
|
||||||
|
freq = 'M'
|
||||||
|
if resolution.lower() == 'y':
|
||||||
|
freq = 'AS'
|
||||||
|
elif resolution.lower() == 'm':
|
||||||
|
freq = 'M'
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
self._threads()
|
||||||
|
|
||||||
|
eaddr = email_address.replace('@', '{at}').lower()
|
||||||
|
|
||||||
|
self._threads()
|
||||||
|
threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
|
||||||
|
threads_from_ranking = threads_from.groupby([pd.TimeGrouper(freq=freq), 'from']).sum()
|
||||||
|
threads_from_ranking = threads_from_ranking.reset_index().pivot(columns='from', index='date', values='nbr-references').fillna(0)
|
||||||
|
return threads_from_ranking[eaddr]
|
||||||
|
|
||||||
|
def threads_from_ranking(self, rank=5, filter_nettime=True):
|
||||||
|
|
||||||
|
self._threads()
|
||||||
|
threads_from = self.threads.reindex(columns=['from', 'nbr-references'])
|
||||||
|
threads_from_ranking = threads_from.groupby([pd.TimeGrouper(freq='AS'), 'from']).sum()
|
||||||
|
threads_from_ranking = threads_from_ranking.reset_index().pivot(columns='from', index='date', values='nbr-references').fillna(0)
|
||||||
|
tfr = threads_from_ranking.sum(axis=0).order(ascending=False)
|
||||||
|
|
||||||
|
if filter_nettime:
|
||||||
|
p = r'^((?!nettime*).)*$'
|
||||||
|
tfr = tfr[tfr.index.str.contains(p)]
|
||||||
|
|
||||||
|
return tfr[:rank]
|
||||||
|
|
||||||
|
def plot_threads_from_ranking(self, resolution='y', rank=5, figsize=(8, 7)):
|
||||||
|
|
||||||
|
threads_rank = self.threads_from_ranking(rank=rank).keys()
|
||||||
|
series = []
|
||||||
|
for k in threads_rank:
|
||||||
|
series.append(self.threads_from(k, resolution))
|
||||||
|
|
||||||
|
df = pd.concat(series, axis=1)
|
||||||
|
|
||||||
|
colors = np.random.rand(len(df),3)
|
||||||
|
|
||||||
|
if figsize:
|
||||||
|
df.plot(colors=colors, figsize=figsize)
|
||||||
|
else:
|
||||||
|
df.plot(colors=colors)
|
||||||
|
|
||||||
|
|
||||||
|
def threads_overall(self, resolution='y', aggregate='sum', tresh=0):
|
||||||
|
|
||||||
|
freq = 'M'
|
||||||
|
if resolution.lower() == 'y':
|
||||||
|
freq = 'AS'
|
||||||
|
elif resolution.lower() == 'm':
|
||||||
|
freq = 'M'
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
agg = aggregate.lower()
|
||||||
|
if not agg in ['sum', 'mean']:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not self.threads is None:
|
||||||
|
del self.threads
|
||||||
|
self.threads = None
|
||||||
|
|
||||||
|
self._threads(tresh)
|
||||||
|
|
||||||
|
if agg == 'sum':
|
||||||
|
y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).sum()
|
||||||
|
else:
|
||||||
|
y = self.threads.groupby([pd.TimeGrouper(freq=freq)]).mean()
|
||||||
|
|
||||||
|
if freq == 'AS':
|
||||||
|
y.index = y.index.year
|
||||||
|
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user