import pathlib, re, frontmatter, markdown, citeproc, json
from pyzotero import zotero
from html.parser import HTMLParser
import utils

CEND = '\33[0m'
CRED = '\33[31m'
CGREEN = '\33[32m'
CVIOLET = '\33[35m'
CBLUE   = '\33[34m'

re_html = '<.*?>'
re_punc = r'[^\w\s]'
CLEANR = re.compile(f'{re_html}|{re_punc}') 

def format_reading(title:str, desc:str):
	c = frontmatter.Post(content=desc)
	c['title'] = title
	c['type'] = 'reading'
	return c

def compare_readings(path:pathlib.PosixPath, title:str, desc:str):
	p = frontmatter.load(path)
	pd = p.to_dict()
	pd['content'] = pd['content'].strip()
	return p, (pd == {'title': title, 'type': 'reading', 'content': desc.strip()})

def update_reading(previous:frontmatter.Post, new:frontmatter.Post):
	keys = set(previous.keys()).union(set(new.keys()))
	for k in keys:
		if not str(previous[k]).strip() == str(new[k]).strip():
			print(f"Update '{k}' (y/n)?\n\tprev: {CRED}{previous[k]}{CEND}\n\tnew: {CGREEN}{new[k]}{CEND}")
			c = input()
			if c == 'y':
				previous[k] = new[k]
	
	if not previous.content.strip() == new.content.strip():
		print(f"Update 'content' (y/n)?\n\tprev: {CRED}{previous.content}{CEND}\n\tnew: {CGREEN}{new.content}{CEND}")
		c = input()
		if c == 'y':
			previous.content = new.content

	return previous

def valid_bib_entry(csljson):
	v = True
	v = v and not ("full text" in csljson['title'].lower())
	v = v and not ("Table of Contents PDF".lower() in csljson['title'].lower())
	v = v and not ("Submitted Version".lower() in csljson['title'].lower())
	v = v and not ("Includes Bibliographical References".lower() in csljson['title'].lower())
	
	# add more...
	return v

def format_filename_title(data_csl:dict, bib_entry:str):	
	index = bib_entry.lower().find(data_csl['title'].lower())
	if index == -1:
		return None
	index += len(data_csl['title']) + 4 if data_csl['type'] == 'book' else len(data_csl['title']) + 1

	title = bib_entry[:index]
	filename = re.sub(CLEANR, '', title).replace(' ', '-') + ".md"

	return filename, title


if __name__ == "__main__":

	conf = utils.load_conf()

	z = zotero.Zotero(conf['zotero_group_id'], conf['zotero_lib_type'], conf['zotero_api_key'])

	for d in z.collections():		
		collection_name = d['data']['name']
		collection_key = d['key']
		print("---")

		z.add_parameters(content='csljson')
		collection = z.collection_items(collection_key)
		collection.reverse()

		# compile bib

		style_file = pathlib.Path(conf['template']) / 'csl' / 'chicago-author-date.csl'

		src = citeproc.source.json.CiteProcJSON(json_data=collection)
		style = citeproc.CitationStylesStyle(style_file.absolute(), validate=False)
		bib = citeproc.CitationStylesBibliography(style=style, source=src, formatter=citeproc.formatter.html)

		# print(collection)
		entries = [citeproc.CitationItem(e['id']) for e in collection if valid_bib_entry(e)]
		bib.register(citeproc.Citation(entries))

		# because citeproc-py can't design shit...
		kv = dict(zip(bib.keys, [str(e) for e in bib.style.render_bibliography(entries)]))

		# print(kv)

		# process collection and bib

		for e in collection:

			eid = e['id'].lower()

			if eid not in bib.keys:
				continue

			bib_entry = kv[eid]
			filename, title = format_filename_title(e, bib_entry)
			filepath = pathlib.Path(conf['content']) / "bibliography" / collection_name / filename

			if not filepath.exists():
				print(f"new reading: {title}")
				new = format_reading(title=title, desc=bib_entry)
				utils.save_file(filepath, frontmatter.dumps(new), mkdirs=True)

			else:
				prev, eq = compare_readings(filepath, title, bib_entry)

				if eq:
					print(f"reading {CVIOLET}{title}{CEND} already exists... continuing")
					continue
				print(f"updating reading: {CBLUE}{title}{CEND}")

				## selective update
				new = format_reading(title=title, desc=bib_entry)

				updated = update_reading(prev, new)
				utils.save_file(filepath, frontmatter.dumps(updated), overwrite=True)

				print(f"reading {e} updated")