From 959f281a1cd21cc6b1ba6f6f65dcb3222bb42666 Mon Sep 17 00:00:00 2001 From: gauthiier Date: Mon, 6 Dec 2021 17:43:33 +0100 Subject: [PATCH] new pull command logic from etherpump --- pppadump/commands/pull.py | 64 +++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/pppadump/commands/pull.py b/pppadump/commands/pull.py index d091c9b..59a7705 100644 --- a/pppadump/commands/pull.py +++ b/pppadump/commands/pull.py @@ -63,6 +63,15 @@ def main (args): p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__") + # > FROM ETHERPUMP + p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__") + + # > FROM ETHERPUMP + p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`") + + # > FROM ETHERPUMP + p.add_argument("--magicwords", default=False, action="store_true", help="download html to PADID.magicwords.html") + args = p.parse_args(args) print ("pppadump version {}".format(VERSION), file=sys.stderr) @@ -167,23 +176,42 @@ def main (args): except OSError: pass + # > following etherpump's logic in matching text and magicwords (without async) + text = getjson(info['localapiurl']+'getText?'+urlencode(data)) - ########################################## - ## ENFORCE __NOPUBLISH__ MAGIC WORD - ########################################## - if args.nopublish and args.nopublish in text['data']['text']: - # TRY TO PURGE ANY EXISTING DOCS - print ("NOPUBLISH!", file=sys.stderr) - try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) + + if text["code"] != 0: + print("API ERROR: {} {}".format(padid, text["message"])) continue - if args.all or args.text: - ver = {"type": "text"} - versions.append(ver) - ver["code"] = text["_code"] - if text["_code"] == 200: - text = text['data']['text'] + ver = {"type": "text"} + versions.append(ver) + ver["code"] = text["_code"] + if text["_code"] == 200: + + text = text['data']['text'] + + ########################################## + ## ENFORCE __NOPUBLISH__ MAGIC WORD + ########################################## + if args.nopublish and args.nopublish in text: + # TRY TO PURGE ANY EXISTING DOCS + print ("{} deleted __NOPUBLISH__".format(padid), file=sys.stderr) + try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) + continue + + ########################################## + ## ENFORCE __PUBLISH__ MAGIC WORD + ########################################## + if args.publish_opt_in and args.publish not in text: + # TRY TO PURGE ANY EXISTING DOCS + print ("{} deleted __PUBLISH__ opt out".format(padid), file=sys.stderr) + try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json")) + continue + + + if args.all or args.text: ver["path"] = p+raw_ext ver["url"] = quote(ver["path"]) with open(ver["path"], "w") as f: @@ -191,6 +219,16 @@ def main (args): # once the content is settled, compute a hash # and link it in the metadata! + + ########################################## + # INCLUDE __XXX__ MAGIC WORDS + ########################################## + if args.all or args.magicwords: + pattern = r"__[a-zA-Z0-9]+?__" + all_matches = re.findall(pattern, text) + magic_words = list(set(all_matches)) + if magic_words: + meta["magicwords"] = magic_words links = [] if args.css: links.append({"href":args.css, "rel":"stylesheet"})