new index command

This commit is contained in:
gauthiier 2019-12-21 15:58:22 +01:00
parent e560cb4cdb
commit e0ab850067
2 changed files with 37 additions and 3 deletions

View File

@ -1,5 +1,5 @@
import sys, logging, argparse import sys, logging, argparse
import lists.crawl import lists.crawl, config
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
@ -24,8 +24,8 @@ if __name__ == "__main__":
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
p.add_argument('url', metavar="url", help="mailinglist urls to archive", nargs="+") p.add_argument('url', metavar="url", help="mailinglist urls to archive", nargs="+")
p.add_argument('--names', help="mailinglists' names", nargs="+") p.add_argument('--names', '-n', help="mailinglists' names", nargs="+")
p.add_argument('--arch', help="path to archives directory (default='archives')", default="archives") p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
args = p.parse_args() args = p.parse_args()

34
index.py Normal file
View File

@ -0,0 +1,34 @@
import os, logging, argparse
from glob import glob
import archive.archive as archive
import config
logging.basicConfig(level=logging.DEBUG)
def list_archives(archives_dir):
return [d for d in os.listdir(archives_dir) if os.path.isdir(os.path.join(archives_dir, d))]
def run(lists, archives):
logging.debug("indexing: " + str(lists) + " from " + archives)
for a in archives:
archive = archive.Archive(a, archive_dir=archives)
archive.insert_db(host=config.host, database=config.database, user=config.user, password=config.password)
if __name__ == "__main__":
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
p.add_argument('list', metavar="list", help="list(s) to index", nargs="+")
p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
args = p.parse_args()
if not args.archives:
args.archives = config.archives
if len(args.list) == 1 and args.list[0] == "all":
args.list = list_archives(args.archives)
run(args.list, args.archives)