listservs/crawl.py

33 lines
957 B
Python
Raw Normal View History

2017-07-12 21:26:36 +02:00
import sys, logging, argparse
2019-12-21 15:58:22 +01:00
import lists.crawl, config
2017-07-12 21:26:36 +02:00
logging.basicConfig(level=logging.DEBUG)
def run(args):
if not args.url:
sys.exit('No url(s). Aborting.')
2017-07-14 10:54:56 +02:00
if not args.names:
args.names = []
2017-07-12 21:26:36 +02:00
## check valid url?... hmm... nej
2017-07-14 10:54:56 +02:00
i = 0
2017-07-12 21:26:36 +02:00
for u in args.url:
2017-07-14 10:54:56 +02:00
name = args.names[i] if i < len(args.names) else None
2019-07-17 12:55:47 +02:00
lists.crawl.crawl(url=u, name=name, sublist_name=name, archive_dir=args.arch) #<-- sublist for nettime
2017-07-14 10:54:56 +02:00
i = i + 1
2017-07-12 21:26:36 +02:00
sys.exit()
if __name__ == "__main__":
p = argparse.ArgumentParser(description='Mailinglists are dead. Long live mailinglists!')
p.add_argument('url', metavar="url", help="mailinglist urls to archive", nargs="+")
2019-12-21 15:58:22 +01:00
p.add_argument('--names', '-n', help="mailinglists' names", nargs="+")
p.add_argument('--archives', '-a', help="path to archives directory (default='archives')", default=config.archives)
2017-07-12 21:26:36 +02:00
args = p.parse_args()
run(args)