From e0ab850067bd020d4e8822b04b0192cfe08a39f1 Mon Sep 17 00:00:00 2001 From: gauthiier Date: Sat, 21 Dec 2019 15:58:22 +0100 Subject: [PATCH] new index command --- crawl.py | 6 +++--- index.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 index.py diff --git a/crawl.py b/crawl.py index 4dc0652..07cf84a 100644 --- a/crawl.py +++ b/crawl.py @@ -1,5 +1,5 @@ import sys, logging, argparse -import lists.crawl +import lists.crawl, config logging.basicConfig(level=logging.DEBUG) @@ -24,8 +24,8 @@ if __name__ == "__main__": p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') p.add_argument('url', metavar="url", help="mailinglist urls to archive", nargs="+") - p.add_argument('--names', help="mailinglists' names", nargs="+") - p.add_argument('--arch', help="path to archives directory (default='archives')", default="archives") + p.add_argument('--names', '-n', help="mailinglists' names", nargs="+") + p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives) args = p.parse_args() diff --git a/index.py b/index.py new file mode 100644 index 0000000..5be515f --- /dev/null +++ b/index.py @@ -0,0 +1,34 @@ +import os, logging, argparse +from glob import glob +import archive.archive as archive +import config + +logging.basicConfig(level=logging.DEBUG) + +def list_archives(archives_dir): + return [d for d in os.listdir(archives_dir) if os.path.isdir(os.path.join(archives_dir, d))] + +def run(lists, archives): + logging.debug("indexing: " + str(lists) + " from " + archives) + + for a in archives: + archive = archive.Archive(a, archive_dir=archives) + archive.insert_db(host=config.host, database=config.database, user=config.user, password=config.password) + +if __name__ == "__main__": + + p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!') + p.add_argument('list', metavar="list", help="list(s) to index", nargs="+") + p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives) + + args = p.parse_args() + + if not args.archives: + args.archives = config.archives + + if len(args.list) == 1 and args.list[0] == "all": + args.list = list_archives(args.archives) + + run(args.list, args.archives) + +