hits

2017-11-24 09:52:14 +01:00 · 2017-11-24 09:52:14 +01:00 · 464194c9b9
commit 464194c9b9
parent 608467fdd8
7 changed files with 72 additions and 9 deletions
--- a/analysis/archive.py
+++ b/analysis/archive.py
@ -5,6 +5,8 @@ import os, datetime, json, gzip, re
 import analysis.util
 import analysis.query

+import search.archive ## circular...
+

 def filter_date(msg, archive_name):
 	
@ -131,6 +133,14 @@ def load_from_file(filename, archive_name, archive_dir, json_data=None):
 			print('---> ' + archive_name)
 			return json_data_to_pd_dataframe(threads, archive_name)

+def load_from_search_archive(archive):
+	threads = []
+	for k, v in archive.archive.items():
+		threads.append(v)
+	return json_data_to_pd_dataframe(threads, archive.archive_name)
+
+
+				

 class Archive:

@ -140,7 +150,10 @@ class Archive:
 	def __init__(self, archive_name, archive_dir="archives"):

 		if isinstance(archive_name, pd.core.frame.DataFrame):
-			self.dataframe = archive_name.copy()
+			self.dataframe = archive_name ## no copies here
+
+		if isinstance(archive_name, search.archive.Archive):
+			self.dataframe = load_from_search_archive(archive_name)

 		if isinstance(archive_name, str):
 			# need a filename or a dir name....
--- a/analysis/format.py
+++ b/analysis/format.py
@ -31,6 +31,13 @@ def table_threads_ranking(ranking_dataframe):

 	return html_str

+def frame_to_dictionary_threads_ranking(ranking_dataframe):
+
+	results = []
+	for i, row in ranking_dataframe.iterrows():
+		d = {'date': str(i), 'subject': row['subject'], 'url': row['url'], 'from': row['from'], 'nbr-references': row['nbr-references']}
+		results.append(d)
+	return results



--- a/analysis/util.py
+++ b/analysis/util.py
@ -11,7 +11,13 @@ def format_author(msg, archive_name):
 	return msg['author_name']

 def format_from_token(from_str, sep):
+
+	fff = from_str
+
 	from_addr = email.utils.parseaddr(from_str)[1]
+
+	fffa = email.utils.parseaddr(from_str)
+
 	if sep not in from_addr:
 		tok = from_str.split()
 		try:
@ -22,6 +28,11 @@ def format_from_token(from_str, sep):
 		except ValueError:
 			print(tok)
 			print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
+			print("*** " + fff)
+			print("+++")
+			print(fffa)
+			print("----")
+
 			return None
 	else:
 		from_addr = from_addr.replace(sep, '{AT}')
--- a/search/archive.py
+++ b/search/archive.py
@ -1,6 +1,10 @@
 import logging, os, json, re
 from datetime import datetime

+import analysis.archive ## circular...
+import analysis.query
+import analysis.format
+
 class Archive():

 	def __init__(self, archives_dir=None):
@ -77,6 +81,25 @@ class Archive():

 		return search_results

+	def threads_ranking(self, rank=5):
+
+		search_results = { "keyword": "thread ranking", "field": "ranking", "archive": self.archive_name, "results": [] }
+
+		a = analysis.archive.Archive(self)
+		q = a.query();
+
+		ranking = q.threads_ranking(rank=rank)
+
+		for i in ranking:
+			r = analysis.format.frame_to_dictionary_threads_ranking(ranking[i])
+			for h in r:
+				hit = [{ 'url': h['url'], 'subject': h['subject'], 'author_name': h['from']}]				
+				search_results['results'].append({'thread': h['date'], 'nbr_hits': h['nbr-references'], 'hits': hit})
+		del a
+		del q
+
+		return search_results
+

 				
 def get_key(kv_tuple):
--- a/www/archives.py
+++ b/www/archives.py
@ -77,4 +77,3 @@ class Archives(metaclass=Singleton):
 		# return arch	


-
--- a/www/routes.py
+++ b/www/routes.py
@ -147,12 +147,13 @@ def searh():
 	logging.info("search keyword = " + k_arg)

 	for l in lists:
+		if k_arg == "rank":
+			logging.info("	ranking " + l)
+			s = archives_data[l].threads_ranking()
+		else:
+			s = archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits)

-		# this makes no sense...
-		# a = search.archive.Archive()
-		# a.load(l)
-
-		results.append(archives_data[l].search(keyword=k_arg, field=f_arg, min_hits=nbr_hits))
+		results.append(s)

 	## -- sort results?
 	search_results = sorted(results, key=get_result_key)
--- a/www/static/search.js
+++ b/www/static/search.js
@ -35,12 +35,21 @@ function search_result_archive(a) {
 			text: r.thread.replace('_', ' ')
 		}).appendTo('#' + a.archive);
 		let hits = "<ul>";
+
+		console.log("---")
+		
 		$.each(r.hits, function(j, h){
+
+			console.log(h)
+
 			let hit = '<li><a href="' + h.url+ '">' + h.subject + '</a> -- <i>' + h.author_name + '</i></li>';
 			hits += hit;
 		});
 		hits += "</ul>";
 		$('#' + r.thread + "-" + a.archive).append(hits);
+
+		console.log("***");
+
 	});
 }
				`@ -77,4 +77,3 @@ class Archives(metaclass=Singleton):`
				`# return arch`