From c3962aa5adf57beab31819b3be483c35a0cf184b Mon Sep 17 00:00:00 2001
From: gauthiier <d@gauthiier.info>
Date: Mon, 25 Aug 2014 12:54:27 +0200
Subject: [PATCH] inject from skim

---
 nnnew.sh            |   2 +
 parse/compare.py    |  74 +++++++++++++++++
 parse/compare.sh    |  16 ++++
 parse/inject.py     | 191 ++++++++++++++++++++++++++++++++++++++++++++
 parse/inject.sh     |  16 ++++
 parse/parse-skim.py |  10 +--
 parse/parse-skim.sh |  18 +++++
 parse/parse.py      |  20 +++--
 8 files changed, 334 insertions(+), 13 deletions(-)
 create mode 100755 parse/compare.py
 create mode 100755 parse/compare.sh
 create mode 100755 parse/inject.py
 create mode 100755 parse/inject.sh
 mode change 100644 => 100755 parse/parse-skim.py
 create mode 100755 parse/parse-skim.sh

diff --git a/nnnew.sh b/nnnew.sh
index 9bdbf98..3678500 100755
--- a/nnnew.sh
+++ b/nnnew.sh
@@ -63,6 +63,8 @@ cp -aR $WHERE/template/* $TITLE
 
 cd $TITLE
 
+touch notes.mmd
+
 # file exists? copy to output
 [ -z "$FILE" ] && echo "No file to copy. Make sure to reference in text in the note!" || curl -O $FILE 
 
diff --git a/parse/compare.py b/parse/compare.py
new file mode 100755
index 0000000..902eed3
--- /dev/null
+++ b/parse/compare.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+import sys, os, string, json
+
+fp1 = ''
+fp2 = ''
+
+def difference(data1, data2):
+	output = {'QUOTES' : [], 'NOTES' : []}
+
+	d1 = {i['quote']: i for i in data1['QUOTES']}
+	d2 = {i['quote']: i for i in data2['QUOTES']}
+
+	#create sets
+	s1 = set(d1.keys())
+	s2 = set(d2.keys())
+
+	#symmetric difference - nope
+	#diff = list(s1 ^ s2)
+
+	# difference between s2 and s1 (s2 being pdf annotation)
+	diff = list(s2 - s1)
+
+	for d in diff:
+		output['QUOTES'].append(d2[d])
+
+	d1 = {i['note']: i for i in data1['NOTES']}
+	d2 = {i['note']: i for i in data2['NOTES']}
+
+	#create sets
+	s1 = set(d1.keys())
+	s2 = set(d2.keys())
+
+	#symmetric difference - nope
+	#diff = list(s1 ^ s2)
+
+	# difference between s2 and s1 (s2 being pdf annotation)
+	diff = list(s2 - s1)
+
+	for d in diff:
+		output['NOTES'].append(d2[d])
+
+	return output
+
+def open_file(p):
+	if not os.path.exists(p):
+		sys.exit('File %s does not exists... Aborting.' % p)
+	return open(p, 'rb')
+
+if __name__ == '__main__':
+	if len(sys.argv) < 2:
+		sys.exit('No input file... Aborting.')
+	fp1 = open_file(sys.argv[1])
+	if len(sys.argv) < 3:
+		fp2 = sys.stdin
+	else:
+		fp2 = open_file(sys.argv[2])
+
+	data1 = json.load(fp1)
+	data2 = json.load(fp2)
+
+	# print "----"
+	# print data1
+	# print "----"
+	# print data2
+	# print "----"
+
+	fp1.close()
+	fp2.close()
+
+	data = difference(data1, data2)
+
+	json.dump(data, sys.stdout)
+
+
diff --git a/parse/compare.sh b/parse/compare.sh
new file mode 100755
index 0000000..773a62b
--- /dev/null
+++ b/parse/compare.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+INDXFILE='.indx'
+
+for i in *.mmd; do
+    f=$i
+done
+
+if [[ ! -f $INDXFILE ]]; then
+	make --quiet index
+elif [[ $f -nt $INDXFILE ]]; then
+	rm $INDXFILE
+	make --quiet index
+fi	
+
+parse-skim.sh | compare.py .indx
diff --git a/parse/inject.py b/parse/inject.py
new file mode 100755
index 0000000..5e54c46
--- /dev/null
+++ b/parse/inject.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+from statemachine import StateMachine
+import sys, os, string, json, shutil, codecs
+
+quote_nbr = sys.maxint
+fileout = ''
+fileref = ''
+notes = []
+quotes = []
+
+def error(c):
+	fp, l = c
+	sys.stderr.write('Unidentifiable line:\n'+ l)
+
+def eof(c):
+	print "eof"
+	return
+
+def parse(c):
+	fp, l = c
+	while 1:
+		line = fp.readline()
+		if not line: return eof, (fp, line)
+		if line[:2] == '##': return section(line), (fp, line)
+		else: 
+			emit_line(line)
+			continue
+
+def QUOTES(c):
+	fp, l = c
+	while 1:
+		line = fp.readline()
+		if not line: 
+			emit_remaining_quotes()
+			return eof, (fp, line)
+		elif string.find(string.upper(line), 'PAGE') >= 0: return process_quote, (fp, line)
+		elif line[:2] == '##': 
+			emit_remaining_quotes()
+			return section(line), (fp, line)
+		else:
+			emit_line(line)
+			continue
+
+def NOTES(c):	
+	fp, l = c
+	while 1:
+		line = fp.readline()
+		if not line: 
+			emit_line('\n')
+			emit_remaining_notes()
+			return eof, (fp, line)
+		elif string.find(string.upper(line), 'NOTE') >= 0: return process_note, (fp, line)
+		elif line[:2] == '##': 
+			emit_line('\n')
+			emit_remaining_notes()			
+			return section(line), (fp, line)		
+		else:
+			emit_line(line) 
+			continue
+
+def process_quote(c):
+	fp, l = c
+	ppnbr = int(string.rsplit(l)[-1])
+	for i in quotes.keys():
+		if int(i) < ppnbr:
+			emit_quotes(quotes[i])
+	emit_line(l)
+	return QUOTES(c)
+
+def process_note(c):
+	fp, l = c
+	emit_line(l)
+	return NOTES(c)
+
+
+####################
+
+def section(line):
+	emit_line(line)
+	line = string.upper(line)
+	if string.find(line, 'NOTES') >= 0: 
+		if not notes:
+			return parse
+		return NOTES
+	elif string.find(line, 'QUOTES') >= 0: 
+		if not quotes:
+			return parse		
+		return QUOTES
+	elif string.find(line, 'REFERENCE') >= 0: return parse
+	else: return parse
+
+def emit_remaining_quotes():
+	rest = []
+	for i in quotes:
+		rest.extend(quotes[i])	
+	emit_quotes(rest)
+
+def emit_quotes(list):
+	while list:
+		emit_quote(list.pop())
+
+def emit_quote(data):
+	emit_line("page " + data['pp'] + "\n\n")
+	emit_line(">" + data['quote'] + "\n")
+	emit_line('\n')
+
+def emit_remaining_notes():
+	rest = []
+	for i in notes:
+		rest.extend(notes[i])	
+	for j in rest:
+		emit_note(j)
+
+def emit_note(data):
+	emit_line("note: " + data['pp'] + "\n\n")
+	emit_line(data['note'] + "\n" )
+	emit_line('\n')
+
+def emit_line(l):
+	#l = l.encode('utf-8')
+	fileout.write(l)
+
+def reoder(q):
+	out = {}
+	while q:
+		i = q.pop()
+		if i['pp'] in out.keys():
+			out[i['pp']].append(i)
+		else:
+			out[i['pp']] = [i]
+	return out
+
+def open_file(p):
+	if not os.path.exists(p):
+		sys.exit('File %s does not exists... Aborting.' % p)
+	return codecs.open(p, 'rb', 'utf-8')
+
+def open_fileoutput(p):
+	if not os.path.exists(p):
+		sys.exit('File %s does not exists... Aborting.' % p)
+	return codecs.open(p, 'r+', 'utf-8')
+
+def backupfile(p):
+	if not os.path.exists(p):
+		sys.exit('File %s does not exists... Aborting.' % p)
+	bak = p + '.bak'
+	shutil.copy2(p, bak)
+	return codecs.open(bak, 'r', 'utf-8')
+
+
+if __name__ == '__main__':
+	if len(sys.argv) < 2:
+		sys.exit('No input file... Aborting.')
+	# fp1 should be the incoming .mmd file
+	fileref = backupfile(sys.argv[1])
+	fileout = open_fileoutput(sys.argv[1])
+	fileout.seek(0)
+	if len(sys.argv) < 3:
+		fp2 = sys.stdin
+	else:
+		fp2 = open_file(sys.argv[2])
+
+	# fp2 should be the incoming (json) data to inject in fp1
+	data = json.load(fp2)
+	fp2.close()
+
+	print data
+
+	if not data['QUOTES'] and not data['NOTES']:
+		print "Document up-to-date."
+		fileout.close()
+		sys.exit(0)
+
+	quotes = reoder(data['QUOTES'])
+	notes = reoder(data['NOTES'])
+
+	m = StateMachine();
+	m.add_state(parse)
+	m.add_state(NOTES)
+	m.add_state(QUOTES)
+	m.add_state(process_quote)
+	m.add_state(process_note)
+	m.add_state(error, end_state=1)
+	m.add_state(eof, end_state=1)
+	m.set_start(parse)
+	m.run((fileref, ''))
+
+	fileout.close()
+	fileref.close()
+
+
diff --git a/parse/inject.sh b/parse/inject.sh
new file mode 100755
index 0000000..a9afdc5
--- /dev/null
+++ b/parse/inject.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+for i in *.mmd; do
+    f=$i
+done
+
+if [[ -z '$f' ]]; then
+	echo "No mmd file (markdown source file) in directory... Aborting.";
+	exit;
+fi
+
+# inject new nnnotes in source file
+compare.sh | inject.py $f
+
+# update index
+make --quiet index
diff --git a/parse/parse-skim.py b/parse/parse-skim.py
old mode 100644
new mode 100755
index be7ba92..58db9c1
--- a/parse/parse-skim.py
+++ b/parse/parse-skim.py
@@ -24,7 +24,7 @@ def highlight(c):
 	fp, l = c
 	p = page(l)
 	text = fp.readline()
-	output['QUOTES'].append({'pp' : p, 'quote' : text})
+	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
 	return parse(c)
 
 def anchored_note(c):
@@ -33,22 +33,22 @@ def anchored_note(c):
 	text = fp.readline()
 	fp.readline()
 	note = fp.readline()	
-	output['QUOTES'].append({'pp' : p, 'quote' : text})
-	output['NOTES'].append({'pp' : p, 'note' : note})
+	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
+	output['NOTES'].append({'pp' : p, 'note' : note.strip()})
 	return parse(c)
 
 def box(c):
 	fp, l = c
 	p = page(l)
 	text = fp.readline()	
-	output['QUOTES'].append({'pp' : p, 'quote' : text})
+	output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
 	return parse(c)
 
 def text_note(c):
 	fp, l = c
 	p = page(l)
 	text = fp.readline()
-	output['NOTES'].append({'pp' : p, 'note' : text})
+	output['NOTES'].append({'pp' : p, 'note' : text.strip()})
 	return parse(c)
 
 ## helper fncts
diff --git a/parse/parse-skim.sh b/parse/parse-skim.sh
new file mode 100755
index 0000000..d773de7
--- /dev/null
+++ b/parse/parse-skim.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+for i in *.pdf; do
+    f=$i
+done
+
+if [[ -z '$f' ]]; then
+	echo "No pdf file in directory... Aborting.";
+	exit;
+fi
+
+filename="${f%.*}"
+
+skimnotes get -format txt $f
+
+parse-skim.py < "$filename.txt"
+
+#rm $filename.txt
\ No newline at end of file
diff --git a/parse/parse.py b/parse/parse.py
index 322a8fd..227cf9f 100755
--- a/parse/parse.py
+++ b/parse/parse.py
@@ -25,20 +25,20 @@ def parse(c):
 
 def QUOTES(c):
 	fp, l = c
-	sys.stdout.write('QUOTES\n')		
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		elif string.find(string.upper(line), 'PAGE') >= 0: return segment, (fp, line, 'QUOTES', markups['QUOTES'])
+		elif line.strip().upper().startswith('PAGE'): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
+		elif line.strip().startswith(u'##'): return section(line), (fp, line)
 		else: continue
 
-def NOTES(c):
+def NOTES(c):	
 	fp, l = c
-	sys.stdout.write('NOTES\n')		
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		elif string.find(string.upper(line), 'NOTE') >= 0: return segment, (fp, line, 'NOTES', markups['NOTES'])
+		elif line.strip().upper().startswith('NOTE'): return segment, (fp, line, 'NOTES', markups['NOTES'])
+		elif line[:2] == '##': return section(line), (fp, line)
 		else: continue
 
 def segment(c):
@@ -57,7 +57,7 @@ def segment(c):
 			# transition: EOF - record entry
 			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return eof, (fp, line)
-		elif string.find(string.upper(line), m) >= 0: 
+		elif line.strip().upper().startswith(m):
 			# transition: new segment - record entry
 			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return segment, (fp, line, sect, mk)
@@ -84,8 +84,9 @@ def section(line):
 	if string.find(line, 'NOTES') >= 0: return NOTES
 	elif string.find(line, 'QUOTES') >= 0: return QUOTES
 	elif string.find(line, 'REFERENCE') >= 0: return parse
-	else: return error
+	else: return parse
 
+# todo - optimise this (i.e: id != only the last word)
 def ext_identifier(line):
 	b = string.rsplit(line)
 	return b[-1]
@@ -97,8 +98,11 @@ def ext_tags(line):
 
 def rec_segment(idf, tags, text, cnt, mk):
 	if not text:
-		print 'hmm... no quote on pp.', c
+		#sys.stderr.write('hmm... no quote on pp.' + idf)
 		return None
+	if text[0] == '>':
+		text = text[1:]
+	text = text.strip()
 	section_i, idf_i, tags_i, text_i, cnt_i = mk
 	entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
 	output[section_i].append(entry)