This commit is contained in:
gauthiier
2017-11-24 09:52:14 +01:00
parent 608467fdd8
commit 464194c9b9
7 changed files with 72 additions and 9 deletions
+14 -1
View File
@@ -5,6 +5,8 @@ import os, datetime, json, gzip, re
import analysis.util
import analysis.query
import search.archive ## circular...
def filter_date(msg, archive_name):
@@ -130,6 +132,14 @@ def load_from_file(filename, archive_name, archive_dir, json_data=None):
print('---> ' + archive_name)
return json_data_to_pd_dataframe(threads, archive_name)
def load_from_search_archive(archive):
threads = []
for k, v in archive.archive.items():
threads.append(v)
return json_data_to_pd_dataframe(threads, archive.archive_name)
class Archive:
@@ -140,7 +150,10 @@ class Archive:
def __init__(self, archive_name, archive_dir="archives"):
if isinstance(archive_name, pd.core.frame.DataFrame):
self.dataframe = archive_name.copy()
self.dataframe = archive_name ## no copies here
if isinstance(archive_name, search.archive.Archive):
self.dataframe = load_from_search_archive(archive_name)
if isinstance(archive_name, str):
# need a filename or a dir name....
+7
View File
@@ -31,6 +31,13 @@ def table_threads_ranking(ranking_dataframe):
return html_str
def frame_to_dictionary_threads_ranking(ranking_dataframe):
results = []
for i, row in ranking_dataframe.iterrows():
d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
results.append(d)
return results
+13 -2
View File
@@ -11,9 +11,15 @@ def format_author(msg, archive_name):
return msg['author_name']
def format_from_token(from_str, sep):
fff = from_str
from_addr = email.utils.parseaddr(from_str)[1]
fffa = email.utils.parseaddr(from_str)
if sep not in from_addr:
tok = from_str.split()
tok = from_str.split()
try:
at = tok.index(sep)
from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]])
@@ -22,13 +28,18 @@ def format_from_token(from_str, sep):
except ValueError:
print(tok)
print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
print("*** " + fff)
print("+++")
print(fffa)
print("----")
return None
else:
from_addr = from_addr.replace(sep, '{AT}')
return from_addr.lower()
def format_from(msg, archive_name):
from_str = msg['from']
from_str = msg['from']
if " {AT} " in from_str:
return format_from_token(from_str, '{AT}')