index + db
This commit is contained in:
parent
afc71795d1
commit
56aab9e545
44
collate_indexes.py
Normal file
44
collate_indexes.py
Normal file
@ -0,0 +1,44 @@
|
||||
import argparse, os, glob, json, logging
|
||||
import config
|
||||
|
||||
def list_all(d, ext):
|
||||
|
||||
if not os.path.isdir(d):
|
||||
logging.error(d + " is not a valid directory.")
|
||||
return None
|
||||
|
||||
return [f for f in glob.glob(os.path.join(d, "*." + ext))]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
files = list_all(config.index['path'], 'js')
|
||||
|
||||
index = {}
|
||||
|
||||
master_fn = os.path.join(config.index['path'], config.index['master'])
|
||||
if os.path.isfile(master_fn):
|
||||
with open(master_fn) as master_fp:
|
||||
try:
|
||||
index = json.load(master_fp)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
for f in files:
|
||||
logging.info("Reading : " + f)
|
||||
|
||||
with open(f) as fp:
|
||||
d = json.load(fp)
|
||||
|
||||
selected = d['selected']
|
||||
|
||||
for s, vv in selected.items():
|
||||
if s not in index:
|
||||
index[s] = {'regex': [s], 'indx': []}
|
||||
# no duplicates
|
||||
for v in vv:
|
||||
if v not in index[s]['indx']:
|
||||
index[s]['indx'].append(v)
|
||||
|
||||
print(json.dumps(index, indent=4, sort_keys=True, ensure_ascii=False))
|
||||
27
create_db.py
Normal file
27
create_db.py
Normal file
@ -0,0 +1,27 @@
|
||||
import os, glob, logging
|
||||
from db import db
|
||||
import config
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def list_all(d, ext):
|
||||
|
||||
if not os.path.isdir(d):
|
||||
logging.error(d + " is not a valid directory.")
|
||||
return None
|
||||
|
||||
return [f for f in glob.glob(os.path.join(d, "*." + ext))]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
db = db.DB(config.list_server_busy_db)
|
||||
|
||||
db.create_db()
|
||||
|
||||
xml_files = list_all(config.xml['path'], 'xml')
|
||||
|
||||
urls_index_file = os.path.join(config.index['path'], config.index['urls'])
|
||||
for x in xml_files:
|
||||
db.insert_db(x, urls_index_file)
|
||||
|
||||
0
db/__init__.py
Normal file
0
db/__init__.py
Normal file
176
db/db.py
Normal file
176
db/db.py
Normal file
@ -0,0 +1,176 @@
|
||||
import mysql.connector as mariadb
|
||||
import os, json, glob, logging
|
||||
from lxml import etree as et
|
||||
import db.sql, db.utils
|
||||
|
||||
class DB:
|
||||
|
||||
db_con = None
|
||||
|
||||
def __init__(self, config):
|
||||
self.db_con = db.utils.connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
if self.db_con is not None:
|
||||
self.db_con.close()
|
||||
|
||||
def create_db(self, config=None):
|
||||
|
||||
logging.info("creating table 'full_digest_rescheduled'")
|
||||
if self.db_con is None:
|
||||
if config is not None:
|
||||
self.db_con = db.utils.connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||
|
||||
if self.db_con is None:
|
||||
logging.warning(" - no connection... Aborting.")
|
||||
return
|
||||
|
||||
try:
|
||||
cursor = self.db_con.cursor()
|
||||
cursor.execute(db.sql.CREATE)
|
||||
except mariadb.Error as error:
|
||||
logging.error("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
logging.info(" - done.")
|
||||
|
||||
def insert_db(self, xml_file, urls_index_file, config=None):
|
||||
|
||||
if self.db_con is None:
|
||||
if config is not None:
|
||||
self.db_con = db.utils.connect_db(config['database'], config['host'], config['user'], config['password'])
|
||||
|
||||
if self.db_con is None:
|
||||
logging.warning(" - no connection... Aborting.")
|
||||
return
|
||||
|
||||
if not os.path.isfile(xml_file):
|
||||
logging.error(f + " is not a valid file.")
|
||||
return None
|
||||
|
||||
# urls_index_file = os.path.join(config.index['path'], config.index['urls'])
|
||||
if not os.path.isfile(urls_index_file):
|
||||
logging.error(urls_index_file + " is not a valid file.")
|
||||
return None
|
||||
|
||||
with open(urls_index_file) as fp:
|
||||
urls = json.load(fp)
|
||||
|
||||
ch = os.path.basename(xml_file).split('.')[0]
|
||||
|
||||
root = et.parse(xml_file).getroot()
|
||||
|
||||
try:
|
||||
logging.info("-----------------")
|
||||
logging.info(os.path.basename(xml_file))
|
||||
logging.info("-----------------")
|
||||
|
||||
cursor = self.db_con.cursor()
|
||||
|
||||
for m in root.findall('mails/mail'):
|
||||
|
||||
nbr_str = m.find('nbr').text
|
||||
to_str = m.find('to').text
|
||||
date_str = m.find('date').text
|
||||
from_str = m.find('from').text
|
||||
subject_str = m.find('subject').text
|
||||
content_str = m.find('content').text
|
||||
|
||||
# format nbr
|
||||
nbr_str = ch + '.' + nbr_str
|
||||
|
||||
if nbr_str not in urls:
|
||||
logging.warning(nbr_str + " is not in urls... skipping.")
|
||||
continue
|
||||
|
||||
url = urls[nbr_str]
|
||||
|
||||
date = db.utils.format_date(date_str)
|
||||
if date is None:
|
||||
logging.warning("null date: " + nbr_str + " - " + date_str + " - " + from_str)
|
||||
logging.warning("continuing...")
|
||||
continue
|
||||
|
||||
# aaarrrgghhh
|
||||
if to_str == "n/a":
|
||||
to_str = "syndicate@aec.at"
|
||||
|
||||
try:
|
||||
# (nbr_, author_name_, to_, subject_, date_, content_, url_)
|
||||
logging.info("inserting " + nbr_str)
|
||||
r = cursor.execute(db.sql.INSERT, (nbr_str, from_str, to_str, subject_str, date, content_str, url))
|
||||
if r == 0:
|
||||
logging.warning("error no insert...")
|
||||
logging.warning(nbr_str + " - " + from_str + " - " + to_str + " - " + subject_str + " - " + date + " - " + content_str + " - " + url)
|
||||
except mariadb.Error as error:
|
||||
if error.errno == 1062:
|
||||
logging.info("+++db+++ duplicate")
|
||||
continue
|
||||
else:
|
||||
logging.warning("Error...")
|
||||
logging.warning(nbr_str + " - " + from_str + " - " + to_str + " - " + subject_str + " - " + date + " - " + content_str + " - " + url)
|
||||
continue
|
||||
|
||||
self.db_con.commit()
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
|
||||
def content_search(self, term, bool=True):
|
||||
|
||||
if self.db_con is None:
|
||||
logging.warning("Not connection to database...")
|
||||
return
|
||||
|
||||
try:
|
||||
cursor = self.db_con.cursor(buffered=True)
|
||||
if bool:
|
||||
cursor.execute(db.sql.CONTENT_QUERY_BOOLEAN.format(self.archive_name, term))
|
||||
else:
|
||||
cursor.execute(db.sql.CONTENT_QUERY.format(self.archive_name, term))
|
||||
|
||||
# nbr_, author_name_, to_, subject_, date_, url_
|
||||
results = []
|
||||
for (nbr_, author_name_, to_, subject_, date_, url_) in cursor:
|
||||
results.append({'nbr': nbr_, 'from': author_name_, 'to': to_, 'subject': subject_, 'date': date_, 'url': url_})
|
||||
# print("{} {} {}".format(from_, str(date_), url_))
|
||||
return results
|
||||
|
||||
except mariadb.Error as error:
|
||||
logging.error("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
def from_search(self, term, bool=True):
|
||||
|
||||
if self.db_con is None:
|
||||
logging.warning("Not connection to database...")
|
||||
return
|
||||
|
||||
try:
|
||||
cursor = self.db_con.cursor(buffered=True)
|
||||
if bool:
|
||||
cursor.execute(archive.sql.FROM_QUERY_BOOLEAN.format(self.archive_name, term))
|
||||
else:
|
||||
cursor.execute(archive.sql.FROM_QUERY.format(self.archive_name, term))
|
||||
|
||||
# print(cursor.rowcount)
|
||||
results = []
|
||||
for (from_, author_name_, subject_, date_, url_) in cursor:
|
||||
results.append((from_, author_name_, subject_, date_, url_))
|
||||
# print("{} {} {}".format(from_, str(date_), url_))
|
||||
return results
|
||||
|
||||
except mariadb.Error as error:
|
||||
logging.error("Error: {}".format(error))
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
|
||||
26
db/listservs.py
Normal file
26
db/listservs.py
Normal file
@ -0,0 +1,26 @@
|
||||
import mysql.connector as mariadb
|
||||
import dateparser, logging
|
||||
import db.sql, db.utils
|
||||
|
||||
|
||||
def query_url(db_con, date_str, from_str, table_str):
|
||||
|
||||
d = db.utils.format_date(date_str)
|
||||
auth = db.utils.format_author(from_str)
|
||||
|
||||
try:
|
||||
result = []
|
||||
cursor = db_con.cursor(buffered=True)
|
||||
cursor.execute(db.sql.URL_QUERY.format(table_str, auth, d))
|
||||
for u in cursor:
|
||||
result.append(u[0])
|
||||
return result
|
||||
except mariadb.Error as error:
|
||||
logging.error("Mariadb error - query_url")
|
||||
logging.error(error)
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error("Error - query_url")
|
||||
return None
|
||||
finally:
|
||||
cursor.close()
|
||||
34
db/sql.py
Normal file
34
db/sql.py
Normal file
@ -0,0 +1,34 @@
|
||||
CREATE = "CREATE TABLE `full_digest_rescheduled` (" \
|
||||
"`nbr_` varchar(20) NOT NULL," \
|
||||
"`author_name_` varchar(200) NOT NULL," \
|
||||
"`to_` varchar(60) NOT NULL," \
|
||||
"`subject_` varchar(500) NOT NULL," \
|
||||
"`date_` datetime NOT NULL," \
|
||||
"`content_` mediumtext NOT NULL," \
|
||||
"`url_` varchar(100) NOT NULL," \
|
||||
"PRIMARY KEY(`nbr_`)," \
|
||||
"FULLTEXT (`subject_`, `content_`)," \
|
||||
"FULLTEXT (`author_name_`)" \
|
||||
") ENGINE = InnoDB;"
|
||||
|
||||
|
||||
INSERT = ("INSERT INTO full_digest_rescheduled"
|
||||
"(nbr_, author_name_, to_, subject_, date_, content_, url_) "
|
||||
"VALUES (%s, %s, %s, %s, %s, %s, %s)")
|
||||
|
||||
CONTENT_QUERY_BOOLEAN = ("SELECT nbr_, author_name_, to_, subject_, date_, url_ from full_digest_rescheduled "
|
||||
"WHERE MATCH(subject_, content_) AGAINST('{}' IN BOOLEAN MODE) ORDER BY date_")
|
||||
|
||||
CONTENT_QUERY_NL = ("SELECT nbr_, author_name_, to_, subject_, date_, url_ from full_digest_rescheduled "
|
||||
"WHERE MATCH(subject_, content_) AGAINST('{}') ORDER BY date_")
|
||||
|
||||
FROM_QUERY_BOOLEAN = ("SELECT nbr_, author_name_, to_, subject_, date_, url_ from full_digest_rescheduled "
|
||||
"WHERE MATCH(author_name_) AGAINST('{}' IN BOOLEAN MODE) ORDER BY date_")
|
||||
|
||||
FROM_QUERY_NL = ("SELECT nbr_, author_name_, to_, subject_, date_, url_ from full_digest_rescheduled "
|
||||
"WHERE MATCH(author_name_) AGAINST('{}') ORDER BY date_")
|
||||
|
||||
URL_QUERY = ('SELECT url_ FROM {} '
|
||||
'WHERE author_name_="{}" AND date_="{}"')
|
||||
|
||||
SHOW_TABLE = "show tables"
|
||||
125
db/utils.py
Normal file
125
db/utils.py
Normal file
@ -0,0 +1,125 @@
|
||||
import mysql.connector as mariadb
|
||||
import db.sql
|
||||
import dateparser
|
||||
|
||||
def connect_db(database, host, user, password):
|
||||
try:
|
||||
con = mariadb.connect(host=host, user=user, password=password, database=database)
|
||||
except mariadb.Error as error:
|
||||
logging.error("Error: {}".format(error))
|
||||
if error.errno == 1049:
|
||||
logging.error("Database " + database + " does not exist.")
|
||||
return None
|
||||
finally:
|
||||
return con
|
||||
|
||||
def list_all_tables(db_con):
|
||||
try:
|
||||
cursor = db_con.cursor()
|
||||
cursor.execute(db.sql.SHOW_TABLE)
|
||||
results = []
|
||||
for t in cursor:
|
||||
results.append(t[0])
|
||||
return results
|
||||
except mariadb.Error as error:
|
||||
logging.error("Error: {}".format(error))
|
||||
return None
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
def format_date(date_str):
|
||||
|
||||
date_time = dateparser.parse(date_str)
|
||||
if date_time is not None:
|
||||
return date_time
|
||||
|
||||
if '(' in date_str:
|
||||
date_str = date_str.split('(')[0].rstrip()
|
||||
|
||||
|
||||
date_time = dateparser.parse(date_str)
|
||||
if date_time is not None:
|
||||
return date_time
|
||||
|
||||
else:
|
||||
# random stuff...
|
||||
fix = False
|
||||
toks = date_str.split()
|
||||
|
||||
if len(toks[-1]) == 5 or len(toks[-1]) == 4:
|
||||
# ex. Thu, 24 Jan 2002 15:21:31 -0000
|
||||
if toks[-1] in ['+0000', '-0000', '0000']:
|
||||
date_str = date_str[:-5]
|
||||
fix = True
|
||||
# ex. Fri, 25 Jan 2002 13:21:49 +1050
|
||||
elif toks[-1][-2] == '5':
|
||||
d = list(date_str)
|
||||
d[-2] = '3'
|
||||
date_str = "".join(d)
|
||||
fix = True
|
||||
|
||||
if toks[-1][-1] != '0':
|
||||
#ex. 'Fri,', '20', 'Jun', '1997', '02:58:59', '-0005'
|
||||
date_str = date_str[:-5]
|
||||
fix = True
|
||||
|
||||
if 'Fru' in toks[0]:
|
||||
date_str = date_str.replace('Fru', 'Fri')
|
||||
fix = True
|
||||
elif 'Thur' in toks[0]:
|
||||
date_str = date_str.replace('Thur', 'Thu')
|
||||
fix = True
|
||||
|
||||
date_time = dateparser.parse(date_str)
|
||||
if date_time is not None:
|
||||
return date_time
|
||||
|
||||
else:
|
||||
|
||||
if 'GMT' in date_str:
|
||||
# ex. 'Mon,', '15', 'Jan', '96', '02:55', 'GMT+0100'
|
||||
date_str = date_str.split('GMT')[0].rstrip()
|
||||
fix = True
|
||||
|
||||
if 'METDST' in toks[-1]:
|
||||
# ex. 'Sat,', '3', 'May', '97', '21:07', 'METDST'
|
||||
date_str = date_str.replace('METDST', 'MET')
|
||||
fix = True
|
||||
|
||||
date_time = dateparser.parse(date_str)
|
||||
if date_time is not None:
|
||||
return date_time
|
||||
else:
|
||||
return None
|
||||
|
||||
def format_author(author_str):
|
||||
|
||||
# author_str = author_str.replace('"', '')
|
||||
|
||||
if "by way of" in author_str:
|
||||
toks = author_str.split("by way of")
|
||||
if toks[0] == "":
|
||||
author_str = format_from(msg)
|
||||
elif toks[0][-1] == "(":
|
||||
author_str = toks[0][:-1].strip()
|
||||
else:
|
||||
author_str = toks[0]
|
||||
|
||||
if ("(" in author_str) or ("<" in author_str):
|
||||
# ex. zx {AT} xyz.net (Michel Foucault) OR Michel Foucault (c'estcommeca.com) OR Michel Foucault <zx {AT} xyz.net>
|
||||
# print("±±±±±±")
|
||||
# print("name: " + author_str)
|
||||
# print("from: " + msg['from'])
|
||||
if not '@' in author_str.lower().replace('{at}', '@').replace(' at ', '@'):
|
||||
author_str = author_str.split('(')[0].strip()
|
||||
else:
|
||||
author_str = email.utils.parseaddr(author_str)[0]
|
||||
# print(" Name:" + author_str.replace('"', ''))
|
||||
# print(" From:" + format_from(msg))
|
||||
|
||||
if " ," in author_str:
|
||||
# nettime's_roving_reporter , thing.net {AT} bbs.thing.net
|
||||
author_str = author_str.split(' ,')[0]
|
||||
|
||||
|
||||
return author_str
|
||||
@ -25,6 +25,15 @@
|
||||
|
||||
- Ch 9.List talking to List
|
||||
+ 13.0 choose-your-own adventure: a brief history of nettim -> choose-your-own adventure: a brief history of nettime
|
||||
+ this is out... dunno why???
|
||||
<nbr>24.0-p.471</nbr>
|
||||
<subject><nettime> what is going on, on nettime?</subject>
|
||||
<from>geert</from>
|
||||
<to>nettime-l@bbs.thing.net</to>
|
||||
<date>Tue, 17 Aug 2004 11:14:50 +0000</date>
|
||||
<content>
|
||||
Well... Alan, nettime being closed off because of lacking nn postings. Many
|
||||
|
||||
|
||||
- Ch 11. CODE
|
||||
+ 5.5 should be 6.5
|
||||
|
||||
4034
index/index.master
Normal file
4034
index/index.master
Normal file
File diff suppressed because it is too large
Load Diff
1229
index/urls.master
Normal file
1229
index/urls.master
Normal file
File diff suppressed because it is too large
Load Diff
153
index_url.py
Normal file
153
index_url.py
Normal file
@ -0,0 +1,153 @@
|
||||
import argparse, os, glob, json, logging, collections
|
||||
import db.utils, db.listservs
|
||||
from lxml import etree as et
|
||||
import mysql.connector as mariadb
|
||||
import config
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def list_all(d, ext):
|
||||
|
||||
if not os.path.isdir(d):
|
||||
logging.error(d + " is not a valid directory.")
|
||||
return None
|
||||
|
||||
return [f for f in glob.glob(os.path.join(d, "*." + ext))]
|
||||
|
||||
|
||||
def index(f, indx=None):
|
||||
|
||||
if not os.path.isfile(f):
|
||||
logging.error(f + " is not a valid file.")
|
||||
return None
|
||||
|
||||
conf = config.listservs_db
|
||||
db_con = db.utils.connect_db(conf['database'], conf['host'], conf['user'], conf['password'])
|
||||
|
||||
if db_con is None:
|
||||
logging.error("Can not connect to db " + conf['database'] + " @ " + conf['host'])
|
||||
return None
|
||||
|
||||
tables = db.utils.list_all_tables(db_con)
|
||||
if tables is None:
|
||||
logging.error("There are no table in db " + conf['database'] + " @ " + conf['host'])
|
||||
db_con.close()
|
||||
return None
|
||||
|
||||
# filename should be of the type: N.xxxx.xml
|
||||
#ex: 3.Network.xml
|
||||
ch = os.path.basename(f).split('.')[0]
|
||||
|
||||
if indx is None:
|
||||
indx = {}
|
||||
|
||||
root = et.parse(f).getroot()
|
||||
|
||||
to_table_map = {
|
||||
'<new-media-curating@jiscmail.ac.uk>': 'crumb',
|
||||
'spectre@mikrolisten.de': 'spectre',
|
||||
'<empyre@lists.cofa.unsw.edu.au>': 'empyre',
|
||||
'nettime-bold@nettime.org': 'nettime_bold',
|
||||
'nettime-l@desk.nl': 'nettime_l',
|
||||
'nettime-l@desk.nl': 'nettime_l',
|
||||
'mettime-l-temp@material.net': 'nettime_l',
|
||||
'nettime-l@bbs.thing.net': 'nettime_l',
|
||||
'nettime-l@kein.org': 'nettime_l',
|
||||
'oldboys@lists.ccc.de': 'oldboys',
|
||||
'n/a': 'syndicate'
|
||||
}
|
||||
|
||||
try:
|
||||
|
||||
logging.info("-----------------")
|
||||
logging.info(os.path.basename(f))
|
||||
logging.info("-----------------")
|
||||
|
||||
for m in root.findall('mails/mail'):
|
||||
|
||||
nbr_str = m.find('nbr').text
|
||||
to_str = m.find('to').text
|
||||
date_str = m.find('date').text
|
||||
from_str = m.find('from').text
|
||||
|
||||
# format nbr
|
||||
nbr_str = ch + '.' + nbr_str
|
||||
|
||||
if nbr_str in indx:
|
||||
logging.warning(nbr_str + " is already indexed... skipping")
|
||||
continue
|
||||
|
||||
table = to_table_map[to_str]
|
||||
|
||||
logging.info(nbr_str + " - [" + table + "] - " + date_str + " - " + from_str)
|
||||
|
||||
# db_con, date_str, from_str, db_str
|
||||
urls = db.listservs.query_url(db_con, date_str, from_str, table)
|
||||
|
||||
if urls is None or len(urls) == 0:
|
||||
logging.warning("No url for " + nbr_str)
|
||||
continue
|
||||
|
||||
if len(urls) > 1:
|
||||
logging.warning("More than one url for " + nbr_str + "... taking first...")
|
||||
|
||||
indx[nbr_str] = urls[0]
|
||||
|
||||
return indx
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("aaaaa")
|
||||
|
||||
raise e
|
||||
|
||||
finally:
|
||||
db_con.close()
|
||||
|
||||
def parse_nbr(nbr_str):
|
||||
if '-' in nbr_str:
|
||||
nbr_str = nbr_str.split('-')[0]
|
||||
return tuple([int(j) for j in nbr_str.split('.')])
|
||||
|
||||
|
||||
def save(fn, ind):
|
||||
logging.info("savig work")
|
||||
with open(fn, 'w') as fp:
|
||||
# sort keys
|
||||
ind = collections.OrderedDict(sorted(ind.items(), key=lambda x: parse_nbr(x[0])))
|
||||
json.dump(ind, fp, indent=4, ensure_ascii=False)
|
||||
logging.info("done.")
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
p = argparse.ArgumentParser(description='Extract urls for mails in xml file')
|
||||
p.add_argument('file', metavar="f", help="xml file to extract urls for")
|
||||
|
||||
ind = {}
|
||||
args = p.parse_args()
|
||||
if args.file == "all":
|
||||
try:
|
||||
urls = os.path.join(config.index['path'], config.index['urls'])
|
||||
with open(urls) as fp:
|
||||
ind = json.load(fp)
|
||||
xml_files = list_all(config.xml['path'], 'xml')
|
||||
for x in xml_files:
|
||||
if index(x, indx=ind) is None:
|
||||
logging.error("Error processing - " + x)
|
||||
save(urls, ind)
|
||||
except KeyboardInterrupt:
|
||||
save(urls, ind)
|
||||
# logging.info("savig work")
|
||||
# with open(urls, 'w') as fp:
|
||||
# # sort keys
|
||||
# ind = collections.OrderedDict(sorted(ind.items(), key=lambda x: tuple([int(j) for j in x[0].split('.')])))
|
||||
# json.dump(ind, fp, indent=4, ensure_ascii=False)
|
||||
# logging.info("done.")
|
||||
else:
|
||||
ind = index(args.file)
|
||||
print(json.dumps(ind, indent=4, sort_keys=True, ensure_ascii=False))
|
||||
|
||||
|
||||
|
||||
@ -31,28 +31,6 @@ def read_index(d, fn):
|
||||
|
||||
return index_data
|
||||
|
||||
# def add_selected_kw_index(d, fn, kw):
|
||||
# fp = os.path.join(d, fn)
|
||||
# if not os.path.isfile(fp):
|
||||
# return False
|
||||
|
||||
# with open(fp) as f:
|
||||
# index_data = json.load(f)
|
||||
|
||||
# if kw not in index_data['orphan']:
|
||||
# return False
|
||||
|
||||
# v = index_data['orphan'].pop(kw)
|
||||
# if kw not in index_data['selected']:
|
||||
# index_data['selected'][kw] = []
|
||||
|
||||
# index_data['selected'][kw] += v
|
||||
|
||||
# with open(fp, 'w') as fout:
|
||||
# json.dump(index_data, fout, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
|
||||
# return True
|
||||
|
||||
|
||||
def modify_selected_kw_index(d, fn, kw, action="add"):
|
||||
fp = os.path.join(d, fn)
|
||||
@ -85,6 +63,15 @@ def modify_selected_kw_index(d, fn, kw, action="add"):
|
||||
|
||||
return True
|
||||
|
||||
def read_index_master(d, fn):
|
||||
fp = os.path.join(d, fn)
|
||||
if not os.path.isfile(fp):
|
||||
return False
|
||||
|
||||
with open(fp) as f:
|
||||
index_data = json.load(f, object_pairs_hook=OrderedDict)
|
||||
|
||||
return index_data
|
||||
|
||||
@app.route('/index', methods = ['GET'])
|
||||
def index():
|
||||
@ -114,6 +101,37 @@ def indexfn(fn):
|
||||
return "ok"
|
||||
return "-"
|
||||
|
||||
@app.route('/index-master', methods = ['GET', 'POST'])
|
||||
def indexmaster():
|
||||
if request.method == 'GET':
|
||||
data = read_index_master(config.index['path'], config.index['master'])
|
||||
if data is not None:
|
||||
return render_template("indx.master.html", fn="INDEX [MASTER]", master=data)
|
||||
else:
|
||||
return "File: " + os.path.join(config.index['path'], config.index['master']) + "does not exist."
|
||||
elif request.method == 'POST':
|
||||
data = request.form
|
||||
a = data.get('action')
|
||||
print(a)
|
||||
if a == "regex":
|
||||
logging.info("POST REGEX " + data.get('kw') + " ++ " + data.get('reg'))
|
||||
reg = json.loads(data.get('reg').replace("'", ""))
|
||||
print(type(data.get('reg')))
|
||||
return "POST REGEX " + data.get('kw') + " ++ " + data.get('reg')
|
||||
# if modify_selected_kw_index(config.index['path'], fn, data.get('kw')):
|
||||
# return "ok"
|
||||
elif a == "collate":
|
||||
logging.info("POST COLLATE " + data.get('kw') + " ++ " + data.get('col'))
|
||||
return "POST COLLATE " + data.get('kw') + " ++ " + data.get('col')
|
||||
# if modify_selected_kw_index(config.index['path'], fn, data.get('kw'), action="delete"):
|
||||
# return "ok"
|
||||
elif a == "delete":
|
||||
logging.info("POST DELETE " + data.get('kw'))
|
||||
return "POST DELETE " + data.get('kw')
|
||||
# if modify_selected_kw_index(config.index['path'], fn, data.get('kw'), action="delete"):
|
||||
# return "ok"
|
||||
return "-"
|
||||
|
||||
|
||||
'''
|
||||
XML
|
||||
|
||||
12
www/static/indx.master.js
Normal file
12
www/static/indx.master.js
Normal file
@ -0,0 +1,12 @@
|
||||
$(document).ready(function(){
|
||||
$('.delete, .regex, .collate').click(function(e) {
|
||||
var li = $(this).parent("li");
|
||||
var reg = li.children(".regex_input")[0].value;
|
||||
var col = li.children(".collate_input")[0].value;
|
||||
$.post('/index-master', {'action': $(this).attr('class'), 'kw': li.data("kw"), 'reg': reg, 'col': col}, function(d) {
|
||||
if(d === 'ok') {
|
||||
location.reload();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
28
www/templates/indx.master.html
Normal file
28
www/templates/indx.master.html
Normal file
@ -0,0 +1,28 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>{{fn}}</title>
|
||||
<script type="text/javascript" src="{{ url_for('static',filename='jquery-3.2.1.min.js') }}" charset="utf-8"></script>
|
||||
<script type="text/javascript" src="{{ url_for('static',filename='indx.master.js') }}"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{{fn}}</h1>
|
||||
<div id="all">
|
||||
<h2>Selected</h2>
|
||||
<ul>
|
||||
{% for kw, s in master.items() %}
|
||||
<li data-kw="{{kw}}" data-list="master">{{kw}} - <input type="text" class="regex_input" value="{{s.regex}}"> <button class="regex">+</button> - <input type="text" class="collate_input" value=""> <button class="collate">+</button> - <button class="delete">-</button>
|
||||
<ul>
|
||||
{% for i in s.indx %}
|
||||
<li>{{i}}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
<hr>
|
||||
<hr>
|
||||
<hr>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@ -5738,7 +5738,7 @@ Ted
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>25.2</nbr>
|
||||
<nbr>25.3</nbr>
|
||||
<subject>Re: <nettime> The Piran Nettime Manifesto</subject>
|
||||
<from>Mark Stahlman (via RadioMail)</from>
|
||||
<to>nettime-l@desk.nl</to>
|
||||
|
||||
172
xml/16.NN.xml
172
xml/16.NN.xml
@ -84,7 +84,7 @@ anywhere in the world.
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>1.0</nbr>
|
||||
<nbr>0.1</nbr>
|
||||
<subject><nettime> Re: Concerned about terrorism in Kosovo</subject>
|
||||
<from>Aleksandar & Branka Davic</from>
|
||||
<to>nettime-l@desk.nl</to>
|
||||
@ -135,6 +135,89 @@ albanians, as well as jews, hungarians, romanian, slowaks, roma, croats,
|
||||
russians, bulgarians.. . i wonder what they would have to say to your
|
||||
letter...
|
||||
|
||||
---
|
||||
# distributed via nettime-l : no commercial use without permission
|
||||
# <nettime> is a closed moderated mailinglist for net criticism,
|
||||
# collaborative text filtering and cultural politics of the nets
|
||||
# more info: majordomo {AT} icf.de and "info nettime" in the msg body
|
||||
# URL: http://www.desk.nl/~nettime/ contact: nettime-owner {AT} icf.de
|
||||
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>0.2</nbr>
|
||||
<subject>Re: <nettime> Re: Concerned about terrorism in Kosovo</subject>
|
||||
<from>rvdbri</from>
|
||||
<to>nettime-l@desk.nl</to>
|
||||
<date>Wed, 04 Mar 1998 10:45:33 +0100</date>
|
||||
<content>
|
||||
Aart wrote:
|
||||
|
||||
|
||||
>OK Richard van den Brink,
|
||||
>
|
||||
>I've seen and read the report by Albanian secessionist magazine (all
|
||||
>articles unsigned) which You have recommended to me.
|
||||
>Well, first those photos. What do You think that Serbian four policemen
|
||||
>who were killed by Kosova Liberation Army looked less massacred than
|
||||
>these Albanians on photos?
|
||||
|
||||
No reason trying to outbid with other pictures or facts. What is
|
||||
disquieting me is that the nettime list is used for an outspoken political
|
||||
point of (Servian) view on the conflict in Kosovo. For me it's ok that this
|
||||
kind of opinions are existing, but I don't need to read those on a list as
|
||||
nettime. In the same way I wouldn't propagandize to read information on
|
||||
certain sites I was recommending as a kind of counter balance.
|
||||
|
||||
It would be possible to start a discussion about the Kosovo conflict here,
|
||||
but I don't think its the propper place. Therefore I suggest - if I am
|
||||
right - to modorate the list in a better way.
|
||||
|
||||
|
||||
Richard
|
||||
|
||||
---
|
||||
# distributed via nettime-l : no commercial use without permission
|
||||
# <nettime> is a closed moderated mailinglist for net criticism,
|
||||
# collaborative text filtering and cultural politics of the nets
|
||||
# more info: majordomo {AT} icf.de and "info nettime" in the msg body
|
||||
# URL: http://www.desk.nl/~nettime/ contact: nettime-owner {AT} icf.de
|
||||
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>0.3</nbr>
|
||||
<subject><nettime> Re: Concerned about terrorism in Kosovo</subject>
|
||||
<from>antiorp</from>
|
||||
<to>nettime-l@desk.nl</to>
|
||||
<date>Sun, 8 Mar 1998 05:54:02 -0600</date>
|
||||
<content>
|
||||
>We in Yugoslavia are very concerned about the increasing of terrorist
|
||||
|
||||
serb!an akt!v!t!ez.
|
||||
|
||||
|
||||
http://www.god-emil.dk/=cw4t7abs/0f0003/produkter/film+video.html
|
||||
|
||||
kl!k 0
|
||||
|
||||
|
||||
|
||||
am -3r! kk a 5 u g3 r _||-
|
||||
|
||||
|
||||
|
||||
> Serb military and paramilitary forces attacked today 14 villages in Kosove.
|
||||
> 95++ d ea d.
|
||||
|
||||
|
||||
|
||||
www.albanian.com
|
||||
www.koha.net
|
||||
www.dardania.com
|
||||
www.kosova.de
|
||||
|
||||
|
||||
---
|
||||
# distributed via nettime-l : no commercial use without permission
|
||||
# <nettime> is a closed moderated mailinglist for net criticism,
|
||||
@ -267,89 +350,6 @@ doesn't mention that.For some reason the README doesn't mention that.For
|
||||
some reason the README doesn't mention that.
|
||||
|
||||
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>1.1</nbr>
|
||||
<subject>Re: <nettime> Re: Concerned about terrorism in Kosovo</subject>
|
||||
<from>rvdbri</from>
|
||||
<to>nettime-l@desk.nl</to>
|
||||
<date>Wed, 04 Mar 1998 10:45:33 +0100</date>
|
||||
<content>
|
||||
Aart wrote:
|
||||
|
||||
|
||||
>OK Richard van den Brink,
|
||||
>
|
||||
>I've seen and read the report by Albanian secessionist magazine (all
|
||||
>articles unsigned) which You have recommended to me.
|
||||
>Well, first those photos. What do You think that Serbian four policemen
|
||||
>who were killed by Kosova Liberation Army looked less massacred than
|
||||
>these Albanians on photos?
|
||||
|
||||
No reason trying to outbid with other pictures or facts. What is
|
||||
disquieting me is that the nettime list is used for an outspoken political
|
||||
point of (Servian) view on the conflict in Kosovo. For me it's ok that this
|
||||
kind of opinions are existing, but I don't need to read those on a list as
|
||||
nettime. In the same way I wouldn't propagandize to read information on
|
||||
certain sites I was recommending as a kind of counter balance.
|
||||
|
||||
It would be possible to start a discussion about the Kosovo conflict here,
|
||||
but I don't think its the propper place. Therefore I suggest - if I am
|
||||
right - to modorate the list in a better way.
|
||||
|
||||
|
||||
Richard
|
||||
|
||||
---
|
||||
# distributed via nettime-l : no commercial use without permission
|
||||
# <nettime> is a closed moderated mailinglist for net criticism,
|
||||
# collaborative text filtering and cultural politics of the nets
|
||||
# more info: majordomo {AT} icf.de and "info nettime" in the msg body
|
||||
# URL: http://www.desk.nl/~nettime/ contact: nettime-owner {AT} icf.de
|
||||
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>1.2</nbr>
|
||||
<subject><nettime> Re: Concerned about terrorism in Kosovo</subject>
|
||||
<from>antiorp</from>
|
||||
<to>nettime-l@desk.nl</to>
|
||||
<date>Sun, 8 Mar 1998 05:54:02 -0600</date>
|
||||
<content>
|
||||
>We in Yugoslavia are very concerned about the increasing of terrorist
|
||||
|
||||
serb!an akt!v!t!ez.
|
||||
|
||||
|
||||
http://www.god-emil.dk/=cw4t7abs/0f0003/produkter/film+video.html
|
||||
|
||||
kl!k 0
|
||||
|
||||
|
||||
|
||||
am -3r! kk a 5 u g3 r _||-
|
||||
|
||||
|
||||
|
||||
> Serb military and paramilitary forces attacked today 14 villages in Kosove.
|
||||
> 95++ d ea d.
|
||||
|
||||
|
||||
|
||||
www.albanian.com
|
||||
www.koha.net
|
||||
www.dardania.com
|
||||
www.kosova.de
|
||||
|
||||
|
||||
---
|
||||
# distributed via nettime-l : no commercial use without permission
|
||||
# <nettime> is a closed moderated mailinglist for net criticism,
|
||||
# collaborative text filtering and cultural politics of the nets
|
||||
# more info: majordomo {AT} icf.de and "info nettime" in the msg body
|
||||
# URL: http://www.desk.nl/~nettime/ contact: nettime-owner {AT} icf.de
|
||||
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
@ -4849,7 +4849,7 @@ disciplines the mødel of civilization.
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>25.0</nbr>
|
||||
<nbr>24.1</nbr>
|
||||
<subject><nettime> Sender: owner-nettime-l {AT} bbs.thing.n</subject>
|
||||
<from>integer</from>
|
||||
<to>nettime-l@bbs.thing.net</to>
|
||||
@ -5377,7 +5377,7 @@ Netochka Nezvanova
|
||||
</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>27.0</nbr>
|
||||
<nbr>26.1</nbr>
|
||||
<subject><nettime> [ot] [!nt] \n2+0\</subject>
|
||||
<from>integer</from>
|
||||
<to>nettime-l@bbs.thing.net</to>
|
||||
|
||||
@ -531,7 +531,7 @@ ana</content>
|
||||
> SPECTRE list for media culture in All Europe</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>3.5</nbr>
|
||||
<nbr>3.5-p.114-2</nbr>
|
||||
<subject>[spectre] Deep Europe</subject>
|
||||
<from>Bruce Sterling</from>
|
||||
<to>spectre@mikrolisten.de</to>
|
||||
|
||||
@ -1808,7 +1808,7 @@ Molly</content>
|
||||
Please do!</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>4.3</nbr>
|
||||
<nbr>4.3-p.138-2</nbr>
|
||||
<subject>Re: <nettime> limits of networks...</subject>
|
||||
<from>David Garcia</from>
|
||||
<to>nettime-l@kein.org</to>
|
||||
@ -4373,7 +4373,7 @@ that.
|
||||
hey hauffeur step on the gas & run over the frog</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>7.12</nbr>
|
||||
<nbr>7.12-p.155-2</nbr>
|
||||
<subject>Re: <nettime> Network, Swarm, Microstructur</subject>
|
||||
<from>martha rosler</from>
|
||||
<to>nettime-l@bbs.thing.net</to>
|
||||
|
||||
@ -9604,52 +9604,6 @@ Trace projects http://trace.ntu.ac.uk/writers/sondheim/index.htm
|
||||
partial mirror at http://www.anu.edu.au/english/internet_txt</content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>24.0-p.471</nbr>
|
||||
<subject><nettime> what is going on, on nettime?</subject>
|
||||
<from>geert</from>
|
||||
<to>nettime-l@bbs.thing.net</to>
|
||||
<date>Tue, 17 Aug 2004 11:14:50 +0000</date>
|
||||
<content>
|
||||
Well... Alan, nettime being closed off because of lacking nn postings. Many
|
||||
will find a relief that such postings and related debates no longer happen, but
|
||||
that's perhaps a personal matter.
|
||||
|
||||
What might be true is the shift towards political economy, away from arts and
|
||||
culture.
|
||||
|
||||
The political economy (of new media) thread has been part of nettime from day
|
||||
one, at least in my understanding. And I am not sure that one can find these
|
||||
debate anywhere.
|
||||
|
||||
It could be the case that the international nettime list lack a common spirit
|
||||
and direction. Is that what you mean?
|
||||
|
||||
The question could be: what moves people these days? I think that's a more
|
||||
interesting--and urgent--question than the old issue of 'censoring' nn or mez.
|
||||
|
||||
Yours,
|
||||
|
||||
Geert
|
||||
|
||||
From: Alan Sondheim <sondheim {AT} panix.com>
|
||||
To: soft_skinned_space <empyre {AT} lists.cofa.unsw.edu.au>
|
||||
Subject: [-empyre-] what is going on, on nettime? (fwd)
|
||||
Date: Sat, 14 Aug 2004 22:20:22 -0400
|
||||
|
||||
nettime-l seems increasingly closed off; numerous voices aren't
|
||||
heard any more, for example nn, mez, Talan - I wrote them asking
|
||||
why the list is turning from cultural politics to more or less
|
||||
straight political economy, which can be found anywhere - the
|
||||
post was censored. Florian Cramer just stopped the Unstable
|
||||
Digest - there's no more codework there at all - he left his
|
||||
co-editors more or less in the lurch, not answering email, then
|
||||
disappearing, now back on nettime with politics. So that venue's
|
||||
gone and apparently at this point one can't even question the
|
||||
list direction onlist.
|
||||
|
||||
<...></content>
|
||||
</mail>
|
||||
<mail>
|
||||
<nbr>25.0</nbr>
|
||||
<subject><nettime> Re: on moderation and spams (several messages)</subject>
|
||||
<from>nettime's_digestive_system</from>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user