hits
This commit is contained in:
parent
608467fdd8
commit
464194c9b9
@ -5,6 +5,8 @@ import os, datetime, json, gzip, re
|
|||||||
import analysis.util
|
import analysis.util
|
||||||
import analysis.query
|
import analysis.query
|
||||||
|
|
||||||
|
import search.archive ## circular...
|
||||||
|
|
||||||
|
|
||||||
def filter_date(msg, archive_name):
|
def filter_date(msg, archive_name):
|
||||||
|
|
||||||
@ -131,6 +133,14 @@ def load_from_file(filename, archive_name, archive_dir, json_data=None):
|
|||||||
print('---> ' + archive_name)
|
print('---> ' + archive_name)
|
||||||
return json_data_to_pd_dataframe(threads, archive_name)
|
return json_data_to_pd_dataframe(threads, archive_name)
|
||||||
|
|
||||||
|
def load_from_search_archive(archive):
|
||||||
|
threads = []
|
||||||
|
for k, v in archive.archive.items():
|
||||||
|
threads.append(v)
|
||||||
|
return json_data_to_pd_dataframe(threads, archive.archive_name)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Archive:
|
class Archive:
|
||||||
|
|
||||||
@ -140,7 +150,10 @@ class Archive:
|
|||||||
def __init__(self, archive_name, archive_dir="archives"):
|
def __init__(self, archive_name, archive_dir="archives"):
|
||||||
|
|
||||||
if isinstance(archive_name, pd.core.frame.DataFrame):
|
if isinstance(archive_name, pd.core.frame.DataFrame):
|
||||||
self.dataframe = archive_name.copy()
|
self.dataframe = archive_name ## no copies here
|
||||||
|
|
||||||
|
if isinstance(archive_name, search.archive.Archive):
|
||||||
|
self.dataframe = load_from_search_archive(archive_name)
|
||||||
|
|
||||||
if isinstance(archive_name, str):
|
if isinstance(archive_name, str):
|
||||||
# need a filename or a dir name....
|
# need a filename or a dir name....
|
||||||
|
|||||||
@ -31,6 +31,13 @@ def table_threads_ranking(ranking_dataframe):
|
|||||||
|
|
||||||
return html_str
|
return html_str
|
||||||
|
|
||||||
|
def frame_to_dictionary_threads_ranking(ranking_dataframe):
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i, row in ranking_dataframe.iterrows():
|
||||||
|
d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
|
||||||
|
results.append(d)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,13 @@ def format_author(msg, archive_name):
|
|||||||
return msg['author_name']
|
return msg['author_name']
|
||||||
|
|
||||||
def format_from_token(from_str, sep):
|
def format_from_token(from_str, sep):
|
||||||
|
|
||||||
|
fff = from_str
|
||||||
|
|
||||||
from_addr = email.utils.parseaddr(from_str)[1]
|
from_addr = email.utils.parseaddr(from_str)[1]
|
||||||
|
|
||||||
|
fffa = email.utils.parseaddr(from_str)
|
||||||
|
|
||||||
if sep not in from_addr:
|
if sep not in from_addr:
|
||||||
tok = from_str.split()
|
tok = from_str.split()
|
||||||
try:
|
try:
|
||||||
@ -22,6 +28,11 @@ def format_from_token(from_str, sep):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
print(tok)
|
print(tok)
|
||||||
print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
|
print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
|
||||||
|
print("*** " + fff)
|
||||||
|
print("+++")
|
||||||
|
print(fffa)
|
||||||
|
print("----")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
from_addr = from_addr.replace(sep, '{AT}')
|
from_addr = from_addr.replace(sep, '{AT}')
|
||||||
|
|||||||
@ -1,6 +1,10 @@
|
|||||||
import logging, os, json, re
|
import logging, os, json, re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
import analysis.archive ## circular...
|
||||||
|
import analysis.query
|
||||||
|
import analysis.format
|
||||||
|
|
||||||
class Archive():
|
class Archive():
|
||||||
|
|
||||||
def __init__(self, archives_dir=None):
|
def __init__(self, archives_dir=None):
|
||||||
@ -77,6 +81,25 @@ class Archive():
|
|||||||
|
|
||||||
return search_results
|
return search_results
|
||||||
|
|
||||||
|
def threads_ranking(self, rank=5):
|
||||||
|
|
||||||
|
search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] }
|
||||||
|
|
||||||
|
a = analysis.archive.Archive(self)
|
||||||
|
q = a.query();
|
||||||
|
|
||||||
|
ranking = q.threads_ranking(rank=rank)
|
||||||
|
|
||||||
|
for i in ranking:
|
||||||
|
r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i])
|
||||||
|
for h in r:
|
||||||
|
hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]
|
||||||
|
search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit})
|
||||||
|
del a
|
||||||
|
del q
|
||||||
|
|
||||||
|
return search_results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_key(kv_tuple):
|
def get_key(kv_tuple):
|
||||||
|
|||||||
@ -77,4 +77,3 @@ class Archives(metaclass=Singleton):
|
|||||||
# return arch
|
# return arch
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -147,12 +147,13 @@ def searh():
|
|||||||
logging.info("search keyword = " + k_arg)
|
logging.info("search keyword = " + k_arg)
|
||||||
|
|
||||||
for l in lists:
|
for l in lists:
|
||||||
|
if k_arg == "rank":
|
||||||
|
logging.info(" ranking " + l)
|
||||||
|
s = archives_data[l].threads_ranking()
|
||||||
|
else:
|
||||||
|
s = archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits)
|
||||||
|
|
||||||
# this makes no sense...
|
results.append(s)
|
||||||
# a = search.archive.Archive()
|
|
||||||
# a.load(l)
|
|
||||||
|
|
||||||
results.append(archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits))
|
|
||||||
|
|
||||||
## -- sort results?
|
## -- sort results?
|
||||||
search_results = sorted(results, key=get_result_key)
|
search_results = sorted(results, key=get_result_key)
|
||||||
|
|||||||
@ -35,12 +35,21 @@ function search_result_archive(a) {
|
|||||||
text: r.thread.replace('_', ' ')
|
text: r.thread.replace('_', ' ')
|
||||||
}).appendTo('#' + a.archive);
|
}).appendTo('#' + a.archive);
|
||||||
let hits = "<ul>";
|
let hits = "<ul>";
|
||||||
|
|
||||||
|
console.log("---")
|
||||||
|
|
||||||
$.each(r.hits, function(j, h){
|
$.each(r.hits, function(j, h){
|
||||||
|
|
||||||
|
console.log(h)
|
||||||
|
|
||||||
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
|
let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
|
||||||
hits += hit;
|
hits += hit;
|
||||||
});
|
});
|
||||||
hits += "</ul>";
|
hits += "</ul>";
|
||||||
$('#' + r.thread + "-" + a.archive).append(hits);
|
$('#' + r.thread + "-" + a.archive).append(hits);
|
||||||
|
|
||||||
|
console.log("***");
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user