hits
This commit is contained in:
+14
-1
@@ -5,6 +5,8 @@ import os, datetime, json, gzip, re
|
||||
import analysis.util
|
||||
import analysis.query
|
||||
|
||||
import search.archive ## circular...
|
||||
|
||||
|
||||
def filter_date(msg, archive_name):
|
||||
|
||||
@@ -130,6 +132,14 @@ def load_from_file(filename, archive_name, archive_dir, json_data=None):
|
||||
|
||||
print('---> ' + archive_name)
|
||||
return json_data_to_pd_dataframe(threads, archive_name)
|
||||
|
||||
def load_from_search_archive(archive):
|
||||
threads = []
|
||||
for k, v in archive.archive.items():
|
||||
threads.append(v)
|
||||
return json_data_to_pd_dataframe(threads, archive.archive_name)
|
||||
|
||||
|
||||
|
||||
|
||||
class Archive:
|
||||
@@ -140,7 +150,10 @@ class Archive:
|
||||
def __init__(self, archive_name, archive_dir="archives"):
|
||||
|
||||
if isinstance(archive_name, pd.core.frame.DataFrame):
|
||||
self.dataframe = archive_name.copy()
|
||||
self.dataframe = archive_name ## no copies here
|
||||
|
||||
if isinstance(archive_name, search.archive.Archive):
|
||||
self.dataframe = load_from_search_archive(archive_name)
|
||||
|
||||
if isinstance(archive_name, str):
|
||||
# need a filename or a dir name....
|
||||
|
||||
@@ -31,6 +31,13 @@ def table_threads_ranking(ranking_dataframe):
|
||||
|
||||
return html_str
|
||||
|
||||
def frame_to_dictionary_threads_ranking(ranking_dataframe):
|
||||
|
||||
results = []
|
||||
for i, row in ranking_dataframe.iterrows():
|
||||
d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
|
||||
results.append(d)
|
||||
return results
|
||||
|
||||
|
||||
|
||||
|
||||
+13
-2
@@ -11,9 +11,15 @@ def format_author(msg, archive_name):
|
||||
return msg['author_name']
|
||||
|
||||
def format_from_token(from_str, sep):
|
||||
|
||||
fff = from_str
|
||||
|
||||
from_addr = email.utils.parseaddr(from_str)[1]
|
||||
|
||||
fffa = email.utils.parseaddr(from_str)
|
||||
|
||||
if sep not in from_addr:
|
||||
tok = from_str.split()
|
||||
tok = from_str.split()
|
||||
try:
|
||||
at = tok.index(sep)
|
||||
from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]])
|
||||
@@ -22,13 +28,18 @@ def format_from_token(from_str, sep):
|
||||
except ValueError:
|
||||
print(tok)
|
||||
print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
|
||||
print("*** " + fff)
|
||||
print("+++")
|
||||
print(fffa)
|
||||
print("----")
|
||||
|
||||
return None
|
||||
else:
|
||||
from_addr = from_addr.replace(sep, '{AT}')
|
||||
return from_addr.lower()
|
||||
|
||||
def format_from(msg, archive_name):
|
||||
from_str = msg['from']
|
||||
from_str = msg['from']
|
||||
|
||||
if " {AT} " in from_str:
|
||||
return format_from_token(from_str, '{AT}')
|
||||
|
||||
Reference in New Issue
Block a user