python refactoring

2016-03-16 14:30:32 +01:00 · 2016-03-16 14:30:32 +01:00 · c19dc35e18
commit c19dc35e18
parent 5e4beb6b71
19 changed files with 287 additions and 260 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,5 @@
 CONFIG
-*.pyc
+*.pyc
+build
+dist
+*.egg-info
--- a/bin/iiindex
+++ b/bin/iiindex
@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import os, sys, glob
+
+from nnnotes import parse
+
+if __name__ == '__main__':
+
+	indexfile = '.indx'
+
+	notes = glob.glob('*.mmd')
+	if len(notes) > 1:
+		sys.exit('More or less *.mmd files than expected.')
+	elif len(notes) < 1:
+		sys.exit('No *.mmd in current directory.')
+
+	try:
+		note = open(notes[0], 'r+')
+	except:
+		sys.exit('Cannot open ' + notes[0])
+
+	with open(indexfile, 'w') as indx:
+		parse.run(note, indx)
+
+
+
+
--- a/bin/iiinject
+++ b/bin/iiinject
@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import os, sys, glob, json
+
+from nnnotes import compare, inject
+
+if __name__ == '__main__':
+
+	indexfile = '.indx'
+
+	pdfs = glob.glob('*.pdf')
+	if len(pdfs) > 1:
+		sys.exit('More than one pdf in current directory. No obvious choice. Aborting.')
+
+	if len(pdfs) == 1:
+		pdf = pdfs[0]
+		if os.path.isfile(indexfile):
+			tmp = '.tmp'
+			os.system('plfr -json ' + pdf + ' > ' + tmp)     	### relying on plfr
+			diff = compare.run(indexfile, tmp)				### new highlights in pdf?
+			
+			with open(tmp, 'w') as fptmp:
+				json.dump(diff, fptmp)
+			indexfile = tmp
+
+		else:
+			os.system('plfr -json ' + pdf + ' > ' + indexfile)
+
+
+	notes = glob.glob('*.mmd')
+	if len(notes) > 1:
+		sys.exit('More or less *.mmd files than expected.')
+	elif len(notes) < 1:
+		sys.exit('No *.mmd in current directory.')
+
+	note = notes[0]
+	inject.run(note, indexfile)
+
+	if os.path.isfile('.tmp'):
+		os.remove('.tmp')
--- a/bin/nnnote
+++ b/bin/nnnote
@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+import os, sys, argparse, shutil, subprocess
+import getpass, time
+
+from nnnotes import TEMPLATE_PATH
+
+def yes_no(question):
+
+	sys.stdout.write(question)
+
+	yes = set(['yes','y', 'ye', ''])
+	no = set(['no','n'])
+
+	choice = raw_input().lower()
+	if choice in yes:
+	   return True
+	elif choice in no:
+	   return False
+	else:
+	   sys.stdout.write("Please respond with 'yes' or 'no'")
+
+def check_note_exists(title):
+
+	if os.path.isdir(title):
+		q = yes_no('The note already exists in the current directory.\nDo you want to proceed and erase the current note? [y/n] ')
+		if not q:
+			sys.exit('Notes already exists. Aborting.')
+		return True
+	return False
+
+
+if __name__ == '__main__':
+
+	p = argparse.ArgumentParser()
+	p.add_argument('-t', '--title', action="store", help="title of the note", required=True)
+	p.add_argument('-f', '--file', action="store", help="(pdf) file to extract notes from")
+	p.add_argument('-b', '--bibtex', action="store", help="bibtex file containing bibliographical information")
+	p.add_argument('-i', '--bibtexitem', action="store", help="name of the item in the bibtex file")
+	p.add_argument('-s', '--style', action="store", help="bibliographic style")
+
+	args = p.parse_args()
+
+	if not check_note_exists(args.title):
+		# make directory for the note
+		os.makedirs(args.title)
+
+	# copy template files -- this needs to change....
+	#home = os.path.dirname(os.path.realpath(__file__))
+	os.system('cp -aR ' + TEMPLATE_PATH + '/* ' + args.title)
+
+	os.chdir(args.title)
+
+	# copy note file if it exists
+	if args.file is not None and os.path.isfile(args.file):
+		shutil.copy2(args.file, os.path.basename(args.file))
+
+	# style
+	style_arg = ''
+	if args.style is not None:
+		os.system('sed -i.bak "s#CSL\ :=#CSL\ := ' + args.style + '#g" Makefile')
+		style_arg = '-s ' + args.style
+
+	# bibtex item
+	bibtexitem_arg = ''
+	if args.bibtexitem is not None:
+		bibtexitem_arg = '-i ' + args.bibtexitem
+
+	# generate bibliographic element
+	if args.bibtex is not None and os.path.isfile(args.bibtex):
+		os.system('sed -i.bak "s#BIB\ :=#BIB\ := ' + args.bibtex + '#g" Makefile')
+		ref = '> ' + subprocess.check_output('csl_unsorted ' + args.bibtex + ' ' + bibtexitem_arg + ' ' + style_arg + ' -f md', shell=True).rstrip()
+		os.system('sed -i.bak "s~*MACHINE-REF*~' + ref + '~g ; s~%\ title~%\ ' + ref + '~g" notes.mmd')
+
+	author = getpass.getuser()
+	date = time.strftime("%d/%m/%Y")
+
+	os.system('sed -i.bak "s~%\ author~%\ ' + author + '~g ; s~%\ date~%\ ' + date + '~g" notes.mmd')
+
+	# cleanup
+	os.system('rm *.bak')
+	os.chdir('..')
+
--- a/nnnotes/init.py
+++ b/nnnotes/init.py
@ -0,0 +1,3 @@
+import os
+
+TEMPLATE_PATH = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'template')
--- a/nnnotes/compare.py
+++ b/nnnotes/compare.py
@ -1,4 +1,8 @@
 #!/usr/bin/env python
+
+# compares two json formatted indexes
+# and return the difference 
+
 import sys, os, string, json
 from operator import attrgetter

@ -8,8 +12,8 @@ fp2 = ''
 def difference(data1, data2):
 	output = {'QUOTES' : [], 'NOTES' : []}

-	d1 = {i['quote']: i for i in data1['QUOTES']}
-	d2 = {i['quote']: i for i in data2['QUOTES']}
+	d1 = {i['quote'].rstrip(): i for i in data1['QUOTES']}
+	d2 = {i['quote'].rstrip(): i for i in data2['QUOTES']}

 	#create sets
 	s1 = set(d1.keys())
@ -47,28 +51,32 @@ def open_file(p):
 		sys.exit('File %s does not exists... Aborting.' % p)
 	return open(p, 'rb')

-if __name__ == '__main__':
-	if len(sys.argv) < 2:
-		sys.exit('No input file... Aborting.')
-	try:
-		fp1 = open_file(sys.argv[1])
-	except:
-		sys.exit("Can't open file " + sys.argv[1] + ". Aborting.")
-	if len(sys.argv) < 3:
-		fp2 = sys.stdin
-	else:
-		try:
-			fp2 = open_file(sys.argv[2])
-		except:
-			sys.exit("Can't open file " + sys.argv[2] + ". Aborting.")
+def run(filename1, filename2):

+	#open files
+	try:
+		fp1 = open_file(filename1)
+	except:
+		sys.exit("Can't open file " + filename1 + ". Aborting.")
+
+	try:
+		fp2 = open_file(filename2)
+	except:
+		# it may be std.in
+		try:
+			fp2 = filename2
+			fp2.tell()
+		except:
+			sys.exit("Can't open file " + filename2 + ". Aborting.")
+
+	#read data
 	try:
 		sdata = fp1.read()
 		data1 = json.loads(sdata)
 	except:
-		e = "<compare> Error loading data from" + sys.argv[1] + ". Aborting.\n"
+		e = "<compare> Error loading data from" + filename1 + ". Aborting.\n"
 		if sdata:
-			e += "Traceback: " + sdata1
+			e += "Traceback: " + sdata
 		fp2.close()
 		sys.exit(e)		
 	finally:
@ -86,11 +94,21 @@ if __name__ == '__main__':
 	finally:
 		fp2.close()

+	#process
 	data = difference(data1, data2)

 	data['QUOTES'] = sorted(data['QUOTES'], key=lambda entry: int(entry['pp']))
 	data['NOTES'] = sorted(data['NOTES'], key=lambda entry: int(entry['pp']))

-	json.dump(data, sys.stdout)
+	#dump
+	return data
+
+#main allows unix piping
+if __name__ == '__main__':
+	if len(sys.argv) < 2:
+		sys.exit('No input file... Aborting.')
+
+	json.dump(run(sys.argv[1], sys.stdin), sys.stdout)
+


--- a/nnnotes/inject.py
+++ b/nnnotes/inject.py
@ -1,4 +1,9 @@
 #!/usr/bin/env python
+
+# reads (1) a formatted md file, (2) json formatted index
+# and injects (sorts and appends) in the md file the information 
+# contained in the index
+
 from statemachine import StateMachine
 import sys, os, string, json, shutil, codecs, traceback

@ -14,7 +19,6 @@ def error(c):
 	sys.stderr.write('Unidentifiable line:\n'+ l)

 def eof(c):
-	print "eof"
 	return

 def parse(c):
@ -120,10 +124,13 @@ def emit_quotes(list):

 def emit_quote(data):
 	emit_line("<!--page " + data['pp'] + "-->\n\n")
-	emit_line(">\"" + data['quote'] + "\" pp." + data['pp'] + "\n")
+	emit_line(">\"" + data['quote'] + "\" p." + data['pp'] + "\n")
 	emit_line('\n')

 def emit_remaining_notes():
+
+	print "emit_remaining_notes"
+
 	rest = []
 	for i in notes:
 		rest.extend(notes[i])	
@ -168,25 +175,27 @@ def backupfile(p):
 	shutil.copy2(p, bak)
 	return codecs.open(bak, 'r', 'utf-8')

+def run(filename1, filename2):
+
+	global fileout, fileref, notes, quotes, notes_cnt

-if __name__ == '__main__':
-	if len(sys.argv) < 2:
-		sys.exit('No input file... Aborting.')
 	# fp1 should be the incoming .mmd file
-	try:
-		fileref = backupfile(sys.argv[1])
-		fileout = open_fileoutput(sys.argv[1])
+	try:		
+		fileref = backupfile(filename1)
+		fileout = open_fileoutput(filename1)
 		fileout.seek(0)
 	except:
-		sys.exit("Can't open file " + sys.argv[1] + ". Aborting.")
+		sys.exit("Can't open file " + filename1 + ". Aborting.")

-	if len(sys.argv) < 3:
-		fp2 = sys.stdin
-	else:
+	try:
+		fp2 = open_file(filename2)
+	except:
+		# it may be stdin
 		try:
-			fp2 = open_file(sys.argv[2])
+			fp2 = filename2
+			fp2.tell()
 		except:
-			sys.exit("Can't open file " + sys.argv[2] + ". Aborting.")
+			sys.exit("Can't open file " + filename2 + ". Aborting.")

 	# fp2 should be the incoming (json) data to inject in fp1

@ -211,8 +220,6 @@ if __name__ == '__main__':
 	quotes = reoder(data['QUOTES'])
 	notes = reoder(data['NOTES'])

-	print quotes
-
 	notes_cnt = 0

 	try:
@ -234,7 +241,22 @@ if __name__ == '__main__':
 	finally:
 		fileout.close()
 		fileref.close()
-		sys.exit(trace)
+		return trace
+			
+#main allows unix piping
+if __name__ == '__main__':
+	if len(sys.argv) < 2:
+		sys.exit('No input file... Aborting.')
+
+	if len(sys.argv) < 3:
+		fp2 = sys.stdin
+	else:
+		fp2 = sys.argv[2]
+
+	trace = run(sys.argv[1], fp2)
+	sys.exit(trace)
+
+



--- a/nnnotes/parse.py
+++ b/nnnotes/parse.py
@ -1,5 +1,8 @@
 #!/usr/bin/env python

+# parses the information containned in a formatted md file
+# and constructs a json formatted index
+
 from statemachine import StateMachine
 import sys, string, re, json

@ -17,14 +20,14 @@ def is_tag_identifier(line):

 markups = {'QUOTES' : (is_quote_identifier, 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : (is_note_identifier, '#', 'tags', 'note', 'fpc')}
 output = {'QUOTES' : [], 'NOTES' : []}
+fpindex = None

 def error(c):
 	fp, l = c
 	sys.stderr.write('Unidentifiable line:\n'+ l)

 def eof(c):
-	fpindx = open('.indx','wb')
-	json.dump(output, fpindx)
+	json.dump(output, fpindex)

 def parse(c):
 	fp, l = c
@ -123,12 +126,16 @@ def escape_quote(line):
 	if(not line.strip().startswith('>')):
 		return line
 	l = re.sub('\"*\"', '', line.strip()[1:])
-	return re.sub('pp.[0-9]+', '', l)
+	return re.sub('p.[0-9]+', '', l)

 def escape_note(line):
 	return re.sub('^[0-9]+.', '', line).strip()

-if __name__ == '__main__':
+def run(fpin, fpout):
+
+	global fpindex
+
+	fpindex = fpout
 	m = StateMachine();
 	m.add_state(parse)
 	m.add_state(NOTES)
@ -137,4 +144,9 @@ if __name__ == '__main__':
 	m.add_state(error, end_state=1)
 	m.add_state(eof, end_state=1)
 	m.set_start(parse)
-	m.run((sys.stdin, ''))
+	m.run((fpin, ''))	
+
+#main allows unix piping
+if __name__ == '__main__':
+	fpindx = open('.indx','wb')
+	run(sys.stdin, fpindx)
--- a/nnnotes/statemachine.py
+++ b/nnnotes/statemachine.py
--- a/nnnotes/template/Makefile
+++ b/nnnotes/template/Makefile
@ -16,22 +16,25 @@ $(OUT)%.pdf : %.mmd
 #html
 HTML := $(patsubst %.mmd,$(OUT)%.html,$(wildcard *.mmd))

-$(OUT)%.html : %.mmd
+$(OUT)%.html: %.mmd
 	pandoc $< -s -t html5 --template=p/nnnote-template.html5 --filter pandoc-citeproc --csl $(CSL) --bibliography $(BIB) -o $@

-all : dir $(HTML) index
+all: dir $(HTML) index

-index :
-	parse.py < $(wildcard *.mmd)
+index:
+	iiindex
+
+inject:
+	iiinject
                        
-clean :
+clean:
 	rm -f $(HTML)
 	rm -f $(PDF)

 neat:
 	rm *.bak

-rebuild : clean all
+rebuild: clean all

-dir : 
-	mkdir -p $(OUT)
+dir: 
+	mkdir -p $(OUT)
--- a/nnnotes/template/notes.mmd
+++ b/nnnotes/template/notes.mmd
--- a/nnnotes/template/p/nnnote-template.html5
+++ b/nnnotes/template/p/nnnote-template.html5
--- a/parse/compare.sh
+++ b/parse/compare.sh
@ -1,18 +0,0 @@
-#!/bin/bash
-
-INDXFILE='.indx'
-
-for i in *.mmd; do
-    f=$i
-done
-
-if [[ ! -f $INDXFILE ]]; then
-	make --quiet index
-elif [[ $f -nt $INDXFILE ]]; then
-	rm $INDXFILE
-	make --quiet index
-fi	
-
-#parse-skim.sh | compare.py .indx
-
-parse-plfr.sh | compare.py .indx
--- a/parse/inject.sh
+++ b/parse/inject.sh
@ -1,16 +0,0 @@
-#!/bin/bash
-
-for i in *.mmd; do
-    f=$i
-done
-
-if [[ -z '$f' ]]; then
-	echo "No mmd file (markdown source file) in directory... Aborting.";
-	exit;
-fi
-
-# inject new nnnotes in source file
-compare.sh | inject.py $f
-
-# update index
-make --quiet index
--- a/parse/parse-plfr.sh
+++ b/parse/parse-plfr.sh
@ -1,13 +0,0 @@
-#!/bin/bash
-
-for i in *.pdf; do
-    f=$i
-done
-
-if [[ -d '$f' ]]; then
-	echo "$f No pdf file in directory... Aborting.";
-	exit;
-fi
-
-plfr -json $f
-
--- a/parse/parse-skim.py
+++ b/parse/parse-skim.py
@ -1,77 +0,0 @@
-#!/usr/bin/python
-
-from statemachine import StateMachine
-import sys, string, json
-
-output = {'QUOTES' : [], 'NOTES' : []}
-
-def error(c):
-	fp, l = c
-	sys.stderr.write('Unidentifiable line:\n'+ l)
-
-def eof(c):
-	print json.dumps(output, sys.stdout)
-
-def parse(c):
-	fp, l = c
-	while 1:
-		line = fp.readline()
-		if not line: return eof, (fp, line)
-		if line[:1] == '*': return section(line), (fp, line)
-		else: continue	
-
-def highlight(c):
-	fp, l = c
-	p = page(l)
-	text = fp.readline()
-	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
-	return parse(c)
-
-def anchored_note(c):
-	fp, l = c
-	p = page(l)
-	text = fp.readline()
-	fp.readline()
-	note = fp.readline()	
-	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
-	output['NOTES'].append({'pp' : p, 'note' : note.strip()})
-	return parse(c)
-
-def box(c):
-	fp, l = c
-	p = page(l)
-	text = fp.readline()	
-	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
-	return parse(c)
-
-def text_note(c):
-	fp, l = c
-	p = page(l)
-	text = fp.readline()
-	output['NOTES'].append({'pp' : p, 'note' : text.strip()})
-	return parse(c)
-
-## helper fncts
-def section(line):
-	line = string.upper(line)
-	if string.find(line, 'HIGHLIGHT') >= 0: return highlight
-	elif string.find(line, 'ANCHORED NOTE') >= 0: return anchored_note
-	elif string.find(line, 'BOX') >= 0: return box
-	elif string.find(line, 'TEXT NOTE') >= 0: return text_note
-	else: return parse
-
-def page(line):	
-	return line.rstrip('\n').split(',')[-1].split(' ')[-1]
-
-if __name__ == '__main__':
-	m = StateMachine();
-	m.add_state(parse)
-	m.add_state(highlight)
-	m.add_state(anchored_note)
-	m.add_state(box)
-	m.add_state(text_note)
-	m.add_state(error, end_state=1)
-	m.add_state(eof, end_state=1)
-	m.set_start(parse)
-	m.run((sys.stdin, ''))
-
--- a/parse/parse-skim.sh
+++ b/parse/parse-skim.sh
@ -1,23 +0,0 @@
-#!/bin/bash
-
-for i in *.pdf; do
-    f=$i
-done
-
-if [[ -d '$f' ]]; then
-	echo "$f No pdf file in directory... Aborting.";
-	exit;
-fi
-
-filename="${f%.*}".txt
-
-skimnotes get -format txt $f
-
-if [[ ! -f $filename ]]; then
-	echo "No skim notes in pdf $f. Aborting.";
-	exit;
-fi
-
-parse-skim.py < $filename
-
-rm $filename
--- a/parse/refactorbib.py
+++ b/parse/refactorbib.py
@ -1,63 +0,0 @@
-#!/usr/bin/env python
-import sys, os, json
-from optparse import OptionParser
-
-def open_file(p):
-	if not os.path.exists(p):
-		sys.exit('File %s does not exists... Aborting.' % p)
-	return open(p, 'rb')
-
-def refactor(data):
-
-	data_out = {}
-	for d in data:
-		nid = ''
-		if(d['author']):
-			nid = d['author'][0]['family'] + d['issued']['date-parts'][0][0]
-		elif(d['editor']):
-			nid = d['editor'][0]['family'] + d['issued']['date-parts'][0][0]
-
-		if nid in data_out:
-			for c in range(97, 122):
-				nid = nid + chr(c)
-				if not nid in data:
-					break
-		data_out[nid] = d
-
-	return data_out
-
-
-if __name__ == '__main__':
-
-	p = OptionParser();
-	p.add_option('-i', '--index', action="store_true", help="prints out index")
-
-	options, args = p.parse_args()
-
-	if len(args) < 1:
-		sys.exit('No input file... Aborting.')
-	try:
-		fp = open_file(args[0])
-	except:
-		sys.exit("Can't open file " + args[0] + ". Aborting.")
-
-	try:
-		data = json.loads(fp.read())
-	except:
-		e = "<refactorbib> Error loading data from" + sys.argv[1] + ". Aborting.\n"
-		if sdata:
-			e += "Traceback: " + sdata1
-		sys.exit(e)		
-	finally:
-		fp.close()
-
-	out = refactor(data)
-
-	if options.index:
-		for e in out.keys():
-			print '> ' + e + ' - ' + out[e]['title'] + '  '
-	else:
-		print json.dumps(out, sort_keys=True, indent=2, separators=(',', ': '))
-
-
-
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+import os, sys
+from setuptools import setup, find_packages
+
+PACKAGE = 'nnnotes'
+
+with open('README') as file:
+    README = file.read()
+
+setup(
+	name = 'nnnotes',
+	version = 'v0',
+	packages = find_packages(),
+	package_data = {PACKAGE: ['template/Makefile', 'template/*.mmd', 'template/p/*']},
+	scripts = ['bin/nnnote', 'bin/iiindex', 'bin/iiinject'],
+	provides=[PACKAGE],
+	author = 'gauthiier',
+	author_email = 'd@gauthiier.info',
+	url = 'https://github.com/gauthiier/nnnotes',
+	long_description=README,
+	classifiers=[
+        "Topic :: Utilities",
+        "License :: MIT License",
+    ]
+)