hits
This commit is contained in:
parent
608467fdd8
commit
464194c9b9
@ -5,6 +5,8 @@ import os, datetime, json, gzip, re
|
||||
import analysis.util
|
||||
import analysis.query
|
||||
|
||||
import search.archive ## circular...
|
||||
|
||||
|
||||
def filter_date(msg, archive_name):
|
||||
|
||||
@ -131,6 +133,14 @@ def load_from_file(filename, archive_name, archive_dir, json_data=None):
|
||||
print('---> ' + archive_name)
|
||||
return json_data_to_pd_dataframe(threads, archive_name)
|
||||
|
||||
def load_from_search_archive(archive):
|
||||
threads = []
|
||||
for k, v in archive.archive.items():
|
||||
threads.append(v)
|
||||
return json_data_to_pd_dataframe(threads, archive.archive_name)
|
||||
|
||||
|
||||
|
||||
|
||||
class Archive:
|
||||
|
||||
@ -140,7 +150,10 @@ class Archive:
|
||||
def __init__(self, archive_name, archive_dir="archives"):
|
||||
|
||||
if isinstance(archive_name, pd.core.frame.DataFrame):
|
||||
self.dataframe = archive_name.copy()
|
||||
self.dataframe = archive_name ## no copies here
|
||||
|
||||
if isinstance(archive_name, search.archive.Archive):
|
||||
self.dataframe = load_from_search_archive(archive_name)
|
||||
|
||||
if isinstance(archive_name, str):
|
||||
# need a filename or a dir name....
|
||||
|
||||
@ -31,6 +31,13 @@ def table_threads_ranking(ranking_dataframe):
|
||||
|
||||
return html_str
|
||||
|
||||
def frame_to_dictionary_threads_ranking(ranking_dataframe):
|
||||
|
||||
results = []
|
||||
for i, row in ranking_dataframe.iterrows():
|
||||
d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
|
||||
results.append(d)
|
||||
return results
|
||||
|
||||
|
||||
|
||||
|
||||
@ -11,7 +11,13 @@ def format_author(msg, archive_name):
|
||||
return msg['author_name']
|
||||
|
||||
def format_from_token(from_str, sep):
|
||||
|
||||
fff = from_str
|
||||
|
||||
from_addr = email.utils.parseaddr(from_str)[1]
|
||||
|
||||
fffa = email.utils.parseaddr(from_str)
|
||||
|
||||
if sep not in from_addr:
|
||||
tok = from_str.split()
|
||||
try:
|
||||
@ -22,6 +28,11 @@ def format_from_token(from_str, sep):
|
||||
except ValueError:
|
||||
print(tok)
|
||||
print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
|
||||
print("*** " + fff)
|
||||
print("+++")
|
||||
print(fffa)
|
||||
print("----")
|
||||
|
||||
return None
|
||||
else:
|
||||
from_addr = from_addr.replace(sep, '{AT}')
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
import logging, os, json, re
|
||||
from datetime import datetime
|
||||
|
||||
import analysis.archive ## circular...
|
||||
import analysis.query
|
||||
import analysis.format
|
||||
|
||||
class Archive():
|
||||
|
||||
def __init__(self, archives_dir=None):
|
||||
@ -77,6 +81,25 @@ class Archive():
|
||||
|
||||
return search_results
|
||||
|
||||
def threads_ranking(self, rank=5):
|
||||
|
||||
search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] }
|
||||
|
||||
a = analysis.archive.Archive(self)
|
||||
q = a.query();
|
||||
|
||||
ranking = q.threads_ranking(rank=rank)
|
||||
|
||||
for i in ranking:
|
||||
r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i])
|
||||
for h in r:
|
||||
hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]
|
||||
search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit})
|
||||
del a
|
||||
del q
|
||||
|
||||
return search_results
|
||||
|
||||
|
||||
|
||||
def get_key(kv_tuple):
|
||||
|
||||
@ -77,4 +77,3 @@ class Archives(metaclass=Singleton):
|
||||
# return arch
|
||||
|
||||
|
||||
|
||||
|
||||
@ -147,12 +147,13 @@ def searh():
|
||||
logging.info("search keyword = " + k_arg)
|
||||
|
||||
for l in lists:
|
||||
if k_arg == "rank":
|
||||
logging.info(" ranking " + l)
|
||||
s = archives_data[l].threads_ranking()
|
||||
else:
|
||||
s = archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits)
|
||||
|
||||
# this makes no sense...
|
||||
# a = search.archive.Archive()
|
||||
# a.load(l)
|
||||
|
||||
results.append(archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits))
|
||||
results.append(s)
|
||||
|
||||
## -- sort results?
|
||||
search_results = sorted(results, key=get_result_key)
|
||||
|
||||
@ -35,12 +35,21 @@ function search_result_archive(a) {
|
||||
text: r.thread.replace('_', ' ')
|
||||
}).appendTo('#' + a.archive);
|
||||
let hits = "<ul>";
|
||||
|
||||
console.log("---")
|
||||
|
||||
$.each(r.hits, function(j, h){
|
||||
|
||||
console.log(h)
|
||||
|
||||
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
|
||||
hits += hit;
|
||||
});
|
||||
hits += "</ul>";
|
||||
$('#' + r.thread + "-" + a.archive).append(hits);
|
||||
|
||||
console.log("***");
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user