139 lines
3.9 KiB
Python
Raw Normal View History

2025-09-21 06:54:25 +02:00
import pathlib, re, frontmatter, markdown, citeproc, json
from pyzotero import zotero
from html.parser import HTMLParser
import utils
CEND = '\33[0m'
CRED = '\33[31m'
CGREEN = '\33[32m'
CVIOLET = '\33[35m'
CBLUE = '\33[34m'
re_html = '<.*?>'
re_punc = r'[^\w\s]'
CLEANR = re.compile(f'{re_html}|{re_punc}')
def format_reading(title:str, desc:str):
c = frontmatter.Post(content=desc)
c['title'] = title
c['type'] = 'reading'
return c
def compare_readings(path:pathlib.PosixPath, title:str, desc:str):
p = frontmatter.load(path)
pd = p.to_dict()
pd['content'] = pd['content'].strip()
return p, (pd == {'title': title, 'type': 'reading', 'content': desc.strip()})
def update_reading(previous:frontmatter.Post, new:frontmatter.Post):
keys = set(previous.keys()).union(set(new.keys()))
for k in keys:
if not str(previous[k]).strip() == str(new[k]).strip():
print(f"Update '{k}' (y/n)?\n\tprev: {CRED}{previous[k]}{CEND}\n\tnew: {CGREEN}{new[k]}{CEND}")
c = input()
if c == 'y':
previous[k] = new[k]
if not previous.content.strip() == new.content.strip():
print(f"Update 'content' (y/n)?\n\tprev: {CRED}{previous.content}{CEND}\n\tnew: {CGREEN}{new.content}{CEND}")
c = input()
if c == 'y':
previous.content = new.content
return previous
def valid_bib_entry(csljson):
v = True
v = v and not ("full text" in csljson['title'].lower())
v = v and not ("Table of Contents PDF".lower() in csljson['title'].lower())
v = v and not ("Submitted Version".lower() in csljson['title'].lower())
v = v and not ("Includes Bibliographical References".lower() in csljson['title'].lower())
# add more...
return v
def format_filename_title(data_csl:dict, bib_entry:str):
index = bib_entry.lower().find(data_csl['title'].lower())
if index == -1:
return None
index += len(data_csl['title']) + 4 if data_csl['type'] == 'book' else len(data_csl['title']) + 1
title = bib_entry[:index]
filename = re.sub(CLEANR, '', title).replace(' ', '-') + ".md"
return filename, title
if __name__ == "__main__":
conf = utils.load_conf()
z = zotero.Zotero(conf['zotero_group_id'], conf['zotero_lib_type'], conf['zotero_api_key'])
for d in z.collections():
collection_name = d['data']['name']
collection_key = d['key']
print("---")
z.add_parameters(content='csljson')
collection = z.collection_items(collection_key)
collection.reverse()
# compile bib
style_file = pathlib.Path(conf['template']) / 'csl' / 'chicago-author-date.csl'
src = citeproc.source.json.CiteProcJSON(json_data=collection)
style = citeproc.CitationStylesStyle(style_file.absolute(), validate=False)
bib = citeproc.CitationStylesBibliography(style=style, source=src, formatter=citeproc.formatter.html)
# print(collection)
entries = [citeproc.CitationItem(e['id']) for e in collection if valid_bib_entry(e)]
bib.register(citeproc.Citation(entries))
# because citeproc-py can't design shit...
kv = dict(zip(bib.keys, [str(e) for e in bib.style.render_bibliography(entries)]))
# print(kv)
# process collection and bib
for e in collection:
eid = e['id'].lower()
if eid not in bib.keys:
continue
bib_entry = kv[eid]
filename, title = format_filename_title(e, bib_entry)
filepath = pathlib.Path(conf['content']) / "bibliography" / collection_name / filename
if not filepath.exists():
print(f"new reading: {title}")
new = format_reading(title=title, desc=bib_entry)
utils.save_file(filepath, frontmatter.dumps(new), mkdirs=True)
else:
prev, eq = compare_readings(filepath, title, bib_entry)
if eq:
print(f"reading {CVIOLET}{title}{CEND} already exists... continuing")
continue
print(f"updating reading: {CBLUE}{title}{CEND}")
## selective update
new = format_reading(title=title, desc=bib_entry)
updated = update_reading(prev, new)
utils.save_file(filepath, frontmatter.dumps(updated), overwrite=True)
print(f"reading {e} updated")