inject from skim

2014-08-25 12:54:27 +02:00
parent a6e704c3f8
commit c3962aa5ad
8 changed files with 334 additions and 13 deletions
@@ -63,6 +63,8 @@ cp -aR $WHERE/template/* $TITLE
 cd $TITLE
 touch notes.mmd
 # file exists? copy to output
 [ -z "$FILE" ] && echo "No file to copy. Make sure to reference in text in the note!" || curl -O $FILE 
@@ -0,0 +1,74 @@
 #!/usr/bin/env python
 import sys, os, string, json
 fp1 = ''
 fp2 = ''
 def difference(data1, data2):
 	output = {'QUOTES' : [], 'NOTES' : []}
 	d1 = {i['quote']: i for i in data1['QUOTES']}
 	d2 = {i['quote']: i for i in data2['QUOTES']}
 	#create sets
 	s1 = set(d1.keys())
 	s2 = set(d2.keys())
 	#symmetric difference - nope
 	#diff = list(s1 ^ s2)
 	# difference between s2 and s1 (s2 being pdf annotation)
 	diff = list(s2 - s1)
 	for d in diff:
 		output['QUOTES'].append(d2[d])
 	d1 = {i['note']: i for i in data1['NOTES']}
 	d2 = {i['note']: i for i in data2['NOTES']}
 	#create sets
 	s1 = set(d1.keys())
 	s2 = set(d2.keys())
 	#symmetric difference - nope
 	#diff = list(s1 ^ s2)
 	# difference between s2 and s1 (s2 being pdf annotation)
 	diff = list(s2 - s1)
 	for d in diff:
 		output['NOTES'].append(d2[d])
 	return output
 def open_file(p):
 	if not os.path.exists(p):
 		sys.exit('File %s does not exists... Aborting.' % p)
 	return open(p, 'rb')
 if __name__ == '__main__':
 	if len(sys.argv) < 2:
 		sys.exit('No input file... Aborting.')
 	fp1 = open_file(sys.argv[1])
 	if len(sys.argv) < 3:
 		fp2 = sys.stdin
 	else:
 		fp2 = open_file(sys.argv[2])
 	data1 = json.load(fp1)
 	data2 = json.load(fp2)
 	# print "----"
 	# print data1
 	# print "----"
 	# print data2
 	# print "----"
 	fp1.close()
 	fp2.close()
 	data = difference(data1, data2)
 	json.dump(data, sys.stdout)
@@ -0,0 +1,16 @@
 #!/bin/bash
 INDXFILE='.indx'
 for i in *.mmd; do
    f=$i
 done
 if [[ ! -f $INDXFILE ]]; then
 	make --quiet index
 elif [[ $f -nt $INDXFILE ]]; then
 	rm $INDXFILE
 	make --quiet index
 fi	
 parse-skim.sh | compare.py .indx
@@ -0,0 +1,191 @@
 #!/usr/bin/env python
 from statemachine import StateMachine
 import sys, os, string, json, shutil, codecs
 quote_nbr = sys.maxint
 fileout = ''
 fileref = ''
 notes = []
 quotes = []
 def error(c):
 	fp, l = c
 	sys.stderr.write('Unidentifiable line:\n'+ l)
 def eof(c):
 	print "eof"
 	return
 def parse(c):
 	fp, l = c
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
 		if line[:2] == '##': return section(line), (fp, line)
 		else: 
 			emit_line(line)
 			continue
 def QUOTES(c):
 	fp, l = c
 	while 1:
 		line = fp.readline()
 		if not line: 
 			emit_remaining_quotes()
 			return eof, (fp, line)
 		elif string.find(string.upper(line), 'PAGE') >= 0: return process_quote, (fp, line)
 		elif line[:2] == '##': 
 			emit_remaining_quotes()
 			return section(line), (fp, line)
 		else:
 			emit_line(line)
 			continue
 def NOTES(c):	
 	fp, l = c
 	while 1:
 		line = fp.readline()
 		if not line: 
 			emit_line('\n')
 			emit_remaining_notes()
 			return eof, (fp, line)
 		elif string.find(string.upper(line), 'NOTE') >= 0: return process_note, (fp, line)
 		elif line[:2] == '##': 
 			emit_line('\n')
 			emit_remaining_notes()			
 			return section(line), (fp, line)		
 		else:
 			emit_line(line) 
 			continue
 def process_quote(c):
 	fp, l = c
 	ppnbr = int(string.rsplit(l)[-1])
 	for i in quotes.keys():
 		if int(i) < ppnbr:
 			emit_quotes(quotes[i])
 	emit_line(l)
 	return QUOTES(c)
 def process_note(c):
 	fp, l = c
 	emit_line(l)
 	return NOTES(c)
 ####################
 def section(line):
 	emit_line(line)
 	line = string.upper(line)
 	if string.find(line, 'NOTES') >= 0: 
 		if not notes:
 			return parse
 		return NOTES
 	elif string.find(line, 'QUOTES') >= 0: 
 		if not quotes:
 			return parse		
 		return QUOTES
 	elif string.find(line, 'REFERENCE') >= 0: return parse
 	else: return parse
 def emit_remaining_quotes():
 	rest = []
 	for i in quotes:
 		rest.extend(quotes[i])	
 	emit_quotes(rest)
 def emit_quotes(list):
 	while list:
 		emit_quote(list.pop())
 def emit_quote(data):
 	emit_line("page " + data['pp'] + "\n\n")
 	emit_line(">" + data['quote'] + "\n")
 	emit_line('\n')
 def emit_remaining_notes():
 	rest = []
 	for i in notes:
 		rest.extend(notes[i])	
 	for j in rest:
 		emit_note(j)
 def emit_note(data):
 	emit_line("note: " + data['pp'] + "\n\n")
 	emit_line(data['note'] + "\n" )
 	emit_line('\n')
 def emit_line(l):
 	#l = l.encode('utf-8')
 	fileout.write(l)
 def reoder(q):
 	out = {}
 	while q:
 		i = q.pop()
 		if i['pp'] in out.keys():
 			out[i['pp']].append(i)
 		else:
 			out[i['pp']] = [i]
 	return out
 def open_file(p):
 	if not os.path.exists(p):
 		sys.exit('File %s does not exists... Aborting.' % p)
 	return codecs.open(p, 'rb', 'utf-8')
 def open_fileoutput(p):
 	if not os.path.exists(p):
 		sys.exit('File %s does not exists... Aborting.' % p)
 	return codecs.open(p, 'r+', 'utf-8')
 def backupfile(p):
 	if not os.path.exists(p):
 		sys.exit('File %s does not exists... Aborting.' % p)
 	bak = p + '.bak'
 	shutil.copy2(p, bak)
 	return codecs.open(bak, 'r', 'utf-8')
 if __name__ == '__main__':
 	if len(sys.argv) < 2:
 		sys.exit('No input file... Aborting.')
 	# fp1 should be the incoming .mmd file
 	fileref = backupfile(sys.argv[1])
 	fileout = open_fileoutput(sys.argv[1])
 	fileout.seek(0)
 	if len(sys.argv) < 3:
 		fp2 = sys.stdin
 	else:
 		fp2 = open_file(sys.argv[2])
 	# fp2 should be the incoming (json) data to inject in fp1
 	data = json.load(fp2)
 	fp2.close()
 	print data
 	if not data['QUOTES'] and not data['NOTES']:
 		print "Document up-to-date."
 		fileout.close()
 		sys.exit(0)
 	quotes = reoder(data['QUOTES'])
 	notes = reoder(data['NOTES'])
 	m = StateMachine();
 	m.add_state(parse)
 	m.add_state(NOTES)
 	m.add_state(QUOTES)
 	m.add_state(process_quote)
 	m.add_state(process_note)
 	m.add_state(error, end_state=1)
 	m.add_state(eof, end_state=1)
 	m.set_start(parse)
 	m.run((fileref, ''))
 	fileout.close()
 	fileref.close()
@@ -0,0 +1,16 @@
 #!/bin/bash
 for i in *.mmd; do
    f=$i
 done
 if [[ -z '$f' ]]; then
 	echo "No mmd file (markdown source file) in directory... Aborting.";
 	exit;
 fi
 # inject new nnnotes in source file
 compare.sh | inject.py $f
 # update index
 make --quiet index
@@ -24,7 +24,7 @@ def highlight(c):
 	fp, l = c
 	p = page(l)
 	text = fp.readline()
-	output['QUOTES'].append({'pp' : p, 'quote' : text})
+	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
 	return parse(c)
 def anchored_note(c):
@@ -33,22 +33,22 @@ def anchored_note(c):
 	text = fp.readline()
 	fp.readline()
 	note = fp.readline()	
-	output['QUOTES'].append({'pp' : p, 'quote' : text})
+	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
-	output['NOTES'].append({'pp' : p, 'note' : note})
+	output['NOTES'].append({'pp' : p, 'note' : note.strip()})
 	return parse(c)
 def box(c):
 	fp, l = c
 	p = page(l)
 	text = fp.readline()	
-	output['QUOTES'].append({'pp' : p, 'quote' : text})
+	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
 	return parse(c)
 def text_note(c):
 	fp, l = c
 	p = page(l)
 	text = fp.readline()
-	output['NOTES'].append({'pp' : p, 'note' : text})
+	output['NOTES'].append({'pp' : p, 'note' : text.strip()})
 	return parse(c)
 ## helper fncts
@@ -0,0 +1,18 @@
 #!/bin/bash
 for i in *.pdf; do
    f=$i
 done
 if [[ -z '$f' ]]; then
 	echo "No pdf file in directory... Aborting.";
 	exit;
 fi
 filename="${f%.*}"
 skimnotes get -format txt $f
 parse-skim.py < "$filename.txt"
 #rm $filename.txt
@@ -25,20 +25,20 @@ def parse(c):
 def QUOTES(c):
 	fp, l = c
 	sys.stdout.write('QUOTES\n')		
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		elif string.find(string.upper(line), 'PAGE') >= 0: return segment, (fp, line, 'QUOTES', markups['QUOTES'])
+		elif line.strip().upper().startswith('PAGE'): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
 		elif line.strip().startswith(u'##'): return section(line), (fp, line)
 		else: continue
 def NOTES(c):	
 	fp, l = c
 	sys.stdout.write('NOTES\n')		
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		elif string.find(string.upper(line), 'NOTE') >= 0: return segment, (fp, line, 'NOTES', markups['NOTES'])
+		elif line.strip().upper().startswith('NOTE'): return segment, (fp, line, 'NOTES', markups['NOTES'])
 		elif line[:2] == '##': return section(line), (fp, line)
 		else: continue
 def segment(c):
@@ -57,7 +57,7 @@ def segment(c):
 			# transition: EOF - record entry
 			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return eof, (fp, line)
-		elif string.find(string.upper(line), m) >= 0: 
+		elif line.strip().upper().startswith(m):
 			# transition: new segment - record entry
 			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return segment, (fp, line, sect, mk)
@@ -84,8 +84,9 @@ def section(line):
 	if string.find(line, 'NOTES') >= 0: return NOTES
 	elif string.find(line, 'QUOTES') >= 0: return QUOTES
 	elif string.find(line, 'REFERENCE') >= 0: return parse
-	else: return error
+	else: return parse
 # todo - optimise this (i.e: id != only the last word)
 def ext_identifier(line):
 	b = string.rsplit(line)
 	return b[-1]
@@ -97,8 +98,11 @@ def ext_tags(line):
 def rec_segment(idf, tags, text, cnt, mk):
 	if not text:
-		print 'hmm... no quote on pp.', c
+		#sys.stderr.write('hmm... no quote on pp.' + idf)
 		return None
 	if text[0] == '>':
 		text = text[1:]
 	text = text.strip()
 	section_i, idf_i, tags_i, text_i, cnt_i = mk
 	entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
 	output[section_i].append(entry)