utf-8 and p[link] logic
This commit is contained in:
parent
00d7a537c7
commit
3d8dd1fbc1
@ -280,17 +280,18 @@ def main (args):
|
||||
except FileNotFoundError:
|
||||
p['text'] = ''
|
||||
|
||||
# ADD IN LINK TO PAD AS "link"
|
||||
for v in linkversions:
|
||||
if v in versions_by_type:
|
||||
vdata = versions_by_type[v]
|
||||
pref_for_link = ["html","pad", "text"] ## making a design decision here (otherwise need to explicitly provide --link in command)
|
||||
for x in pref_for_link:
|
||||
if x in versions_by_type:
|
||||
vdata = versions_by_type[x]
|
||||
try:
|
||||
if v == "pad":
|
||||
if v == "pad" or os.path.exists(vdata["path"]):
|
||||
p["link"] = absurl(vdata["url"], linkbase)
|
||||
break
|
||||
except KeyError as e:
|
||||
pass
|
||||
|
||||
|
||||
# Not sure this goes here but fixing relpaths with output is quite nice...
|
||||
if args.output:
|
||||
outpath = Path(args.output).parent
|
||||
@ -308,6 +309,13 @@ def main (args):
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
if not p["link"].startswith("http"):
|
||||
vpath = Path(p["link"])
|
||||
try:
|
||||
p["link"] = os.path.join(os.path.relpath(vpath.parent, outpath), vpath.name)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -260,9 +260,9 @@ def main (args):
|
||||
# doc = html5lib.parse(html, treebuilder="etree", override_encoding="utf-8", namespaceHTMLElements=False)
|
||||
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links, viewport_meta="width=device-width,initial-scale=1")
|
||||
with open(ver["path"], "w") as f:
|
||||
with open(ver["path"], "w", encoding="utf-8") as f:
|
||||
# f.write(html.encode("utf-8"))
|
||||
print(ET.tostring(doc, method="html", encoding="utf-8"), file=f)
|
||||
print(ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
except TypeError:
|
||||
# Malformed / incomplete response, record the message (such as "internal error") in the metadata and write NO file!
|
||||
ver["message"] = html["message"]
|
||||
@ -279,15 +279,11 @@ def main (args):
|
||||
html = html['data']['html']
|
||||
ver["path"] = p+".raw.html"
|
||||
ver["url"] = quote(ver["path"])
|
||||
# JUN 2016: chaning to save REALLY the RAW / unchanged HTML from the API
|
||||
with open(ver["path"], "w") as f:
|
||||
print(html, file=f)
|
||||
|
||||
# doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
# html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links)
|
||||
# with open(ver["path"], "w") as f:
|
||||
# # f.write(html.encode("utf-8"))
|
||||
# print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
doc = html5lib.parse(html, treebuilder="etree", namespaceHTMLElements=False)
|
||||
html5tidy(doc, indent=True, title=padid, scripts=args.script, links=links, viewport_meta="width=device-width,initial-scale=1")
|
||||
with open(ver["path"], "w", encoding="utf-8") as f:
|
||||
# f.write(html.encode("utf-8"))
|
||||
print (ET.tostring(doc, method="html", encoding="unicode"), file=f)
|
||||
|
||||
# output meta
|
||||
if args.all or args.meta:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user