139 lines
3.9 KiB
Python
139 lines
3.9 KiB
Python
|
|
import pathlib, re, frontmatter, markdown, citeproc, json
|
||
|
|
from pyzotero import zotero
|
||
|
|
from html.parser import HTMLParser
|
||
|
|
import utils
|
||
|
|
|
||
|
|
CEND = '\33[0m'
|
||
|
|
CRED = '\33[31m'
|
||
|
|
CGREEN = '\33[32m'
|
||
|
|
CVIOLET = '\33[35m'
|
||
|
|
CBLUE = '\33[34m'
|
||
|
|
|
||
|
|
re_html = '<.*?>'
|
||
|
|
re_punc = r'[^\w\s]'
|
||
|
|
CLEANR = re.compile(f'{re_html}|{re_punc}')
|
||
|
|
|
||
|
|
def format_reading(title:str, desc:str):
|
||
|
|
c = frontmatter.Post(content=desc)
|
||
|
|
c['title'] = title
|
||
|
|
c['type'] = 'reading'
|
||
|
|
return c
|
||
|
|
|
||
|
|
def compare_readings(path:pathlib.PosixPath, title:str, desc:str):
|
||
|
|
p = frontmatter.load(path)
|
||
|
|
pd = p.to_dict()
|
||
|
|
pd['content'] = pd['content'].strip()
|
||
|
|
return p, (pd == {'title': title, 'type': 'reading', 'content': desc.strip()})
|
||
|
|
|
||
|
|
def update_reading(previous:frontmatter.Post, new:frontmatter.Post):
|
||
|
|
keys = set(previous.keys()).union(set(new.keys()))
|
||
|
|
for k in keys:
|
||
|
|
if not str(previous[k]).strip() == str(new[k]).strip():
|
||
|
|
print(f"Update '{k}' (y/n)?\n\tprev: {CRED}{previous[k]}{CEND}\n\tnew: {CGREEN}{new[k]}{CEND}")
|
||
|
|
c = input()
|
||
|
|
if c == 'y':
|
||
|
|
previous[k] = new[k]
|
||
|
|
|
||
|
|
if not previous.content.strip() == new.content.strip():
|
||
|
|
print(f"Update 'content' (y/n)?\n\tprev: {CRED}{previous.content}{CEND}\n\tnew: {CGREEN}{new.content}{CEND}")
|
||
|
|
c = input()
|
||
|
|
if c == 'y':
|
||
|
|
previous.content = new.content
|
||
|
|
|
||
|
|
return previous
|
||
|
|
|
||
|
|
def valid_bib_entry(csljson):
|
||
|
|
v = True
|
||
|
|
v = v and not ("full text" in csljson['title'].lower())
|
||
|
|
v = v and not ("Table of Contents PDF".lower() in csljson['title'].lower())
|
||
|
|
v = v and not ("Submitted Version".lower() in csljson['title'].lower())
|
||
|
|
v = v and not ("Includes Bibliographical References".lower() in csljson['title'].lower())
|
||
|
|
|
||
|
|
# add more...
|
||
|
|
return v
|
||
|
|
|
||
|
|
def format_filename_title(data_csl:dict, bib_entry:str):
|
||
|
|
index = bib_entry.lower().find(data_csl['title'].lower())
|
||
|
|
if index == -1:
|
||
|
|
return None
|
||
|
|
index += len(data_csl['title']) + 4 if data_csl['type'] == 'book' else len(data_csl['title']) + 1
|
||
|
|
|
||
|
|
title = bib_entry[:index]
|
||
|
|
filename = re.sub(CLEANR, '', title).replace(' ', '-') + ".md"
|
||
|
|
|
||
|
|
return filename, title
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
|
||
|
|
conf = utils.load_conf()
|
||
|
|
|
||
|
|
z = zotero.Zotero(conf['zotero_group_id'], conf['zotero_lib_type'], conf['zotero_api_key'])
|
||
|
|
|
||
|
|
for d in z.collections():
|
||
|
|
collection_name = d['data']['name']
|
||
|
|
collection_key = d['key']
|
||
|
|
print("---")
|
||
|
|
|
||
|
|
z.add_parameters(content='csljson')
|
||
|
|
collection = z.collection_items(collection_key)
|
||
|
|
collection.reverse()
|
||
|
|
|
||
|
|
# compile bib
|
||
|
|
|
||
|
|
style_file = pathlib.Path(conf['template']) / 'csl' / 'chicago-author-date.csl'
|
||
|
|
|
||
|
|
src = citeproc.source.json.CiteProcJSON(json_data=collection)
|
||
|
|
style = citeproc.CitationStylesStyle(style_file.absolute(), validate=False)
|
||
|
|
bib = citeproc.CitationStylesBibliography(style=style, source=src, formatter=citeproc.formatter.html)
|
||
|
|
|
||
|
|
# print(collection)
|
||
|
|
entries = [citeproc.CitationItem(e['id']) for e in collection if valid_bib_entry(e)]
|
||
|
|
bib.register(citeproc.Citation(entries))
|
||
|
|
|
||
|
|
# because citeproc-py can't design shit...
|
||
|
|
kv = dict(zip(bib.keys, [str(e) for e in bib.style.render_bibliography(entries)]))
|
||
|
|
|
||
|
|
# print(kv)
|
||
|
|
|
||
|
|
# process collection and bib
|
||
|
|
|
||
|
|
for e in collection:
|
||
|
|
|
||
|
|
eid = e['id'].lower()
|
||
|
|
|
||
|
|
if eid not in bib.keys:
|
||
|
|
continue
|
||
|
|
|
||
|
|
bib_entry = kv[eid]
|
||
|
|
filename, title = format_filename_title(e, bib_entry)
|
||
|
|
filepath = pathlib.Path(conf['content']) / "bibliography" / collection_name / filename
|
||
|
|
|
||
|
|
if not filepath.exists():
|
||
|
|
print(f"new reading: {title}")
|
||
|
|
new = format_reading(title=title, desc=bib_entry)
|
||
|
|
utils.save_file(filepath, frontmatter.dumps(new), mkdirs=True)
|
||
|
|
|
||
|
|
else:
|
||
|
|
prev, eq = compare_readings(filepath, title, bib_entry)
|
||
|
|
|
||
|
|
if eq:
|
||
|
|
print(f"reading {CVIOLET}{title}{CEND} already exists... continuing")
|
||
|
|
continue
|
||
|
|
print(f"updating reading: {CBLUE}{title}{CEND}")
|
||
|
|
|
||
|
|
## selective update
|
||
|
|
new = format_reading(title=title, desc=bib_entry)
|
||
|
|
|
||
|
|
updated = update_reading(prev, new)
|
||
|
|
utils.save_file(filepath, frontmatter.dumps(updated), overwrite=True)
|
||
|
|
|
||
|
|
print(f"reading {e} updated")
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|