new pull command logic from etherpump

This commit is contained in:
gauthiier 2021-12-06 17:43:33 +01:00
parent aa246254f4
commit 959f281a1c

View File

@ -63,6 +63,15 @@ def main (args):
p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__") p.add_argument("--nopublish", default="__NOPUBLISH__", help="no publish magic word, default: __NOPUBLISH__")
# > FROM ETHERPUMP
p.add_argument("--publish", default="__PUBLISH__", help="the publish magic word, default: __PUBLISH__")
# > FROM ETHERPUMP
p.add_argument("--publish-opt-in", default=False, action="store_true", help="ensure `--publish` is honoured instead of `--nopublish`")
# > FROM ETHERPUMP
p.add_argument("--magicwords", default=False, action="store_true", help="download html to PADID.magicwords.html")
args = p.parse_args(args) args = p.parse_args(args)
print ("pppadump version {}".format(VERSION), file=sys.stderr) print ("pppadump version {}".format(VERSION), file=sys.stderr)
@ -167,23 +176,42 @@ def main (args):
except OSError: except OSError:
pass pass
# > following etherpump's logic in matching text and magicwords (without async)
text = getjson(info['localapiurl']+'getText?'+urlencode(data)) text = getjson(info['localapiurl']+'getText?'+urlencode(data))
##########################################
## ENFORCE __NOPUBLISH__ MAGIC WORD if text["code"] != 0:
########################################## print("API ERROR: {} {}".format(padid, text["message"]))
if args.nopublish and args.nopublish in text['data']['text']:
# TRY TO PURGE ANY EXISTING DOCS
print ("NOPUBLISH!", file=sys.stderr)
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
continue continue
if args.all or args.text: ver = {"type": "text"}
ver = {"type": "text"} versions.append(ver)
versions.append(ver) ver["code"] = text["_code"]
ver["code"] = text["_code"]
if text["_code"] == 200:
text = text['data']['text']
if text["_code"] == 200:
text = text['data']['text']
##########################################
## ENFORCE __NOPUBLISH__ MAGIC WORD
##########################################
if args.nopublish and args.nopublish in text:
# TRY TO PURGE ANY EXISTING DOCS
print ("{} deleted __NOPUBLISH__".format(padid), file=sys.stderr)
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
continue
##########################################
## ENFORCE __PUBLISH__ MAGIC WORD
##########################################
if args.publish_opt_in and args.publish not in text:
# TRY TO PURGE ANY EXISTING DOCS
print ("{} deleted __PUBLISH__ opt out".format(padid), file=sys.stderr)
try_deleting((p+raw_ext,p+".raw.html",p+".diff.html",p+".meta.json"))
continue
if args.all or args.text:
ver["path"] = p+raw_ext ver["path"] = p+raw_ext
ver["url"] = quote(ver["path"]) ver["url"] = quote(ver["path"])
with open(ver["path"], "w") as f: with open(ver["path"], "w") as f:
@ -191,6 +219,16 @@ def main (args):
# once the content is settled, compute a hash # once the content is settled, compute a hash
# and link it in the metadata! # and link it in the metadata!
##########################################
# INCLUDE __XXX__ MAGIC WORDS
##########################################
if args.all or args.magicwords:
pattern = r"__[a-zA-Z0-9]+?__"
all_matches = re.findall(pattern, text)
magic_words = list(set(all_matches))
if magic_words:
meta["magicwords"] = magic_words
links = [] links = []
if args.css: if args.css:
links.append({"href":args.css, "rel":"stylesheet"}) links.append({"href":args.css, "rel":"stylesheet"})