listservs/crawl.py

33 lines
928 B
Python
Raw Normal View History

2017-07-12 21:26:36 +02:00
import sys, logging, argparse
2017-07-14 10:54:56 +02:00
import lists.crawl
2017-07-12 21:26:36 +02:00
logging.basicConfig(level=logging.DEBUG)
def run(args):
if not args.url:
sys.exit('No url(s). Aborting.')
2017-07-14 10:54:56 +02:00
if not args.names:
args.names = []
2017-07-12 21:26:36 +02:00
## check valid url?... hmm... nej
2017-07-14 10:54:56 +02:00
i = 0
2017-07-12 21:26:36 +02:00
for u in args.url:
2017-07-14 10:54:56 +02:00
name = args.names[i] if i < len(args.names) else None
2019-07-17 12:55:47 +02:00
lists.crawl.crawl(url=u, name=name, sublist_name=name, archive_dir=args.arch) #<-- sublist for nettime
2017-07-14 10:54:56 +02:00
i = i + 1
2017-07-12 21:26:36 +02:00
sys.exit()
if __name__ == "__main__":
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
p.add_argument('url', metavar="url", help="mailinglist urls to archive", nargs="+")
2017-07-14 10:54:56 +02:00
p.add_argument('--names', help="mailinglists' names", nargs="+")
2017-07-12 21:26:36 +02:00
p.add_argument('--arch', help="path to archives directory (default='archives')", default="archives")
args = p.parse_args()
run(args)