This commit is contained in:
gauthiier 2017-11-24 09:52:14 +01:00
parent 608467fdd8
commit 464194c9b9
7 changed files with 72 additions and 9 deletions

View File

@ -5,6 +5,8 @@ import os, datetime, json, gzip, re
import analysis.util import analysis.util
import analysis.query import analysis.query
import search.archive ## circular...
def filter_date(msg, archive_name): def filter_date(msg, archive_name):
@ -130,6 +132,14 @@ def load_from_file(filename, archive_name, archive_dir, json_data=None):
print('---> ' + archive_name) print('---> ' + archive_name)
return json_data_to_pd_dataframe(threads, archive_name) return json_data_to_pd_dataframe(threads, archive_name)
def load_from_search_archive(archive):
threads = []
for k, v in archive.archive.items():
threads.append(v)
return json_data_to_pd_dataframe(threads, archive.archive_name)
class Archive: class Archive:
@ -140,7 +150,10 @@ class Archive:
def __init__(self, archive_name, archive_dir="archives"): def __init__(self, archive_name, archive_dir="archives"):
if isinstance(archive_name, pd.core.frame.DataFrame): if isinstance(archive_name, pd.core.frame.DataFrame):
self.dataframe = archive_name.copy() self.dataframe = archive_name ## no copies here
if isinstance(archive_name, search.archive.Archive):
self.dataframe = load_from_search_archive(archive_name)
if isinstance(archive_name, str): if isinstance(archive_name, str):
# need a filename or a dir name.... # need a filename or a dir name....

View File

@ -31,6 +31,13 @@ def table_threads_ranking(ranking_dataframe):
return html_str return html_str
def frame_to_dictionary_threads_ranking(ranking_dataframe):
results = []
for i, row in ranking_dataframe.iterrows():
d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
results.append(d)
return results

View File

@ -11,9 +11,15 @@ def format_author(msg, archive_name):
return msg['author_name'] return msg['author_name']
def format_from_token(from_str, sep): def format_from_token(from_str, sep):
fff = from_str
from_addr = email.utils.parseaddr(from_str)[1] from_addr = email.utils.parseaddr(from_str)[1]
fffa = email.utils.parseaddr(from_str)
if sep not in from_addr: if sep not in from_addr:
tok = from_str.split() tok = from_str.split()
try: try:
at = tok.index(sep) at = tok.index(sep)
from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]]) from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]])
@ -22,13 +28,18 @@ def format_from_token(from_str, sep):
except ValueError: except ValueError:
print(tok) print(tok)
print("error formating 'from' " + from_str + " -- expecting sep: " + sep) print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
print("*** " + fff)
print("+++")
print(fffa)
print("----")
return None return None
else: else:
from_addr = from_addr.replace(sep, '{AT}') from_addr = from_addr.replace(sep, '{AT}')
return from_addr.lower() return from_addr.lower()
def format_from(msg, archive_name): def format_from(msg, archive_name):
from_str = msg['from'] from_str = msg['from']
if " {AT} " in from_str: if " {AT} " in from_str:
return format_from_token(from_str, '{AT}') return format_from_token(from_str, '{AT}')

View File

@ -1,6 +1,10 @@
import logging, os, json, re import logging, os, json, re
from datetime import datetime from datetime import datetime
import analysis.archive ## circular...
import analysis.query
import analysis.format
class Archive(): class Archive():
def __init__(self, archives_dir=None): def __init__(self, archives_dir=None):
@ -77,6 +81,25 @@ class Archive():
return search_results return search_results
def threads_ranking(self, rank=5):
search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] }
a = analysis.archive.Archive(self)
q = a.query();
ranking = q.threads_ranking(rank=rank)
for i in ranking:
r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i])
for h in r:
hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]
search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit})
del a
del q
return search_results
def get_key(kv_tuple): def get_key(kv_tuple):

View File

@ -77,4 +77,3 @@ class Archives(metaclass=Singleton):
# return arch # return arch

View File

@ -147,12 +147,13 @@ def searh():
logging.info("search keyword = " + k_arg) logging.info("search keyword = " + k_arg)
for l in lists: for l in lists:
if k_arg == "rank":
logging.info(" ranking " + l)
s = archives_data[l].threads_ranking()
else:
s = archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits)
# this makes no sense... results.append(s)
# a = search.archive.Archive()
# a.load(l)
results.append(archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits))
## -- sort results? ## -- sort results?
search_results = sorted(results, key=get_result_key) search_results = sorted(results, key=get_result_key)

View File

@ -35,12 +35,21 @@ function search_result_archive(a) {
text: r.thread.replace('_', ' ') text: r.thread.replace('_', ' ')
}).appendTo('#' + a.archive); }).appendTo('#' + a.archive);
let hits = "<ul>"; let hits = "<ul>";
console.log("---")
$.each(r.hits, function(j, h){ $.each(r.hits, function(j, h){
console.log(h)
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>'; let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
hits += hit; hits += hit;
}); });
hits += "</ul>"; hits += "</ul>";
$('#' + r.thread + "-" + a.archive).append(hits); $('#' + r.thread + "-" + a.archive).append(hits);
console.log("***");
}); });
} }