import pathlib, re, frontmatter, markdown, citeproc, json from pyzotero import zotero from html.parser import HTMLParser import utils CEND = '\33[0m' CRED = '\33[31m' CGREEN = '\33[32m' CVIOLET = '\33[35m' CBLUE = '\33[34m' re_html = '<.*?>' re_punc = r'[^\w\s]' CLEANR = re.compile(f'{re_html}|{re_punc}') def format_reading(title:str, desc:str): c = frontmatter.Post(content=desc) c['title'] = title c['type'] = 'reading' return c def compare_readings(path:pathlib.PosixPath, title:str, desc:str): p = frontmatter.load(path) pd = p.to_dict() pd['content'] = pd['content'].strip() return p, (pd == {'title': title, 'type': 'reading', 'content': desc.strip()}) def update_reading(previous:frontmatter.Post, new:frontmatter.Post): keys = set(previous.keys()).union(set(new.keys())) for k in keys: if not str(previous[k]).strip() == str(new[k]).strip(): print(f"Update '{k}' (y/n)?\n\tprev: {CRED}{previous[k]}{CEND}\n\tnew: {CGREEN}{new[k]}{CEND}") c = input() if c == 'y': previous[k] = new[k] if not previous.content.strip() == new.content.strip(): print(f"Update 'content' (y/n)?\n\tprev: {CRED}{previous.content}{CEND}\n\tnew: {CGREEN}{new.content}{CEND}") c = input() if c == 'y': previous.content = new.content return previous def valid_bib_entry(csljson): v = True v = v and not ("full text" in csljson['title'].lower()) v = v and not ("Table of Contents PDF".lower() in csljson['title'].lower()) v = v and not ("Submitted Version".lower() in csljson['title'].lower()) v = v and not ("Includes Bibliographical References".lower() in csljson['title'].lower()) # add more... return v def format_filename_title(data_csl:dict, bib_entry:str): index = bib_entry.lower().find(data_csl['title'].lower()) if index == -1: return None index += len(data_csl['title']) + 4 if data_csl['type'] == 'book' else len(data_csl['title']) + 1 title = bib_entry[:index] filename = re.sub(CLEANR, '', title).replace(' ', '-') + ".md" return filename, title if __name__ == "__main__": conf = utils.load_conf() z = zotero.Zotero(conf['zotero_group_id'], conf['zotero_lib_type'], conf['zotero_api_key']) for d in z.collections(): collection_name = d['data']['name'] collection_key = d['key'] print("---") z.add_parameters(content='csljson') collection = z.collection_items(collection_key) collection.reverse() # compile bib style_file = pathlib.Path(conf['template']) / 'csl' / 'chicago-author-date.csl' src = citeproc.source.json.CiteProcJSON(json_data=collection) style = citeproc.CitationStylesStyle(style_file.absolute(), validate=False) bib = citeproc.CitationStylesBibliography(style=style, source=src, formatter=citeproc.formatter.html) # print(collection) entries = [citeproc.CitationItem(e['id']) for e in collection if valid_bib_entry(e)] bib.register(citeproc.Citation(entries)) # because citeproc-py can't design shit... kv = dict(zip(bib.keys, [str(e) for e in bib.style.render_bibliography(entries)])) # print(kv) # process collection and bib for e in collection: eid = e['id'].lower() if eid not in bib.keys: continue bib_entry = kv[eid] filename, title = format_filename_title(e, bib_entry) filepath = pathlib.Path(conf['content']) / "bibliography" / collection_name / filename if not filepath.exists(): print(f"new reading: {title}") new = format_reading(title=title, desc=bib_entry) utils.save_file(filepath, frontmatter.dumps(new), mkdirs=True) else: prev, eq = compare_readings(filepath, title, bib_entry) if eq: print(f"reading {CVIOLET}{title}{CEND} already exists... continuing") continue print(f"updating reading: {CBLUE}{title}{CEND}") ## selective update new = format_reading(title=title, desc=bib_entry) updated = update_reading(prev, new) utils.save_file(filepath, frontmatter.dumps(updated), overwrite=True) print(f"reading {e} updated")