From e88d0d19731ccc9ff1089b32862a62bf4481ee3d Mon Sep 17 00:00:00 2001
From: gauthiier <d@gauthiier.info>
Date: Mon, 25 Aug 2014 17:57:34 +0200
Subject: [PATCH] new nnnotes and qqquotes identifiers

---
 parse/inject.py     | 38 +++++++++++++++++-------
 parse/parse-skim.sh |  2 +-
 parse/parse.py      | 70 +++++++++++++++++++++++++++++----------------
 3 files changed, 75 insertions(+), 35 deletions(-)

diff --git a/parse/inject.py b/parse/inject.py
index 5e54c46..035d5ab 100755
--- a/parse/inject.py
+++ b/parse/inject.py
@@ -7,6 +7,7 @@ fileout = ''
 fileref = ''
 notes = []
 quotes = []
+notes_cnt = 0
 
 def error(c):
 	fp, l = c
@@ -33,7 +34,7 @@ def QUOTES(c):
 		if not line: 
 			emit_remaining_quotes()
 			return eof, (fp, line)
-		elif string.find(string.upper(line), 'PAGE') >= 0: return process_quote, (fp, line)
+		elif is_quote_identifier(line): return process_quote, (fp, line)
 		elif line[:2] == '##': 
 			emit_remaining_quotes()
 			return section(line), (fp, line)
@@ -49,7 +50,7 @@ def NOTES(c):
 			emit_line('\n')
 			emit_remaining_notes()
 			return eof, (fp, line)
-		elif string.find(string.upper(line), 'NOTE') >= 0: return process_note, (fp, line)
+		elif is_note_identifier(line): return process_note, (fp, line)
 		elif line[:2] == '##': 
 			emit_line('\n')
 			emit_remaining_notes()			
@@ -60,7 +61,7 @@ def NOTES(c):
 
 def process_quote(c):
 	fp, l = c
-	ppnbr = int(string.rsplit(l)[-1])
+	ppnbr = int(extract_identifier(l))
 	for i in quotes.keys():
 		if int(i) < ppnbr:
 			emit_quotes(quotes[i])
@@ -68,7 +69,11 @@ def process_quote(c):
 	return QUOTES(c)
 
 def process_note(c):
+	global notes_cnt
 	fp, l = c
+	cnt = int(extract_identifier(l))
+	if(cnt > notes_cnt):
+		notes_cnt = cnt
 	emit_line(l)
 	return NOTES(c)
 
@@ -89,6 +94,18 @@ def section(line):
 	elif string.find(line, 'REFERENCE') >= 0: return parse
 	else: return parse
 
+def is_quote_identifier(line):
+	l = line.strip().upper()
+	return l.startswith("<!--") and l.find("PAGE") >= 0
+
+def is_note_identifier(line):
+	l = line.strip().upper()
+	return l.startswith("<!--") and l.find("NOTE") >= 0		
+
+def extract_identifier(line):
+	t = line.strip().replace('<!--', '').replace('-->', '')
+	return t.strip().rsplit()[-1]
+
 def emit_remaining_quotes():
 	rest = []
 	for i in quotes:
@@ -100,8 +117,8 @@ def emit_quotes(list):
 		emit_quote(list.pop())
 
 def emit_quote(data):
-	emit_line("page " + data['pp'] + "\n\n")
-	emit_line(">" + data['quote'] + "\n")
+	emit_line("<!--page " + data['pp'] + "-->\n\n")
+	emit_line(">\"" + data['quote'] + "\" pp." + data['pp'] + "\n")
 	emit_line('\n')
 
 def emit_remaining_notes():
@@ -112,9 +129,10 @@ def emit_remaining_notes():
 		emit_note(j)
 
 def emit_note(data):
-	emit_line("note: " + data['pp'] + "\n\n")
-	emit_line(data['note'] + "\n" )
-	emit_line('\n')
+	global notes_cnt
+	notes_cnt += 1
+	emit_line("<!--note " + str(notes_cnt) + "-->\n\n")
+	emit_line(str(notes_cnt) + ". " + data['note'] + "\n\n" )
 
 def emit_line(l):
 	#l = l.encode('utf-8')
@@ -164,8 +182,6 @@ if __name__ == '__main__':
 	data = json.load(fp2)
 	fp2.close()
 
-	print data
-
 	if not data['QUOTES'] and not data['NOTES']:
 		print "Document up-to-date."
 		fileout.close()
@@ -174,6 +190,8 @@ if __name__ == '__main__':
 	quotes = reoder(data['QUOTES'])
 	notes = reoder(data['NOTES'])
 
+	notes_cnt = 0
+
 	m = StateMachine();
 	m.add_state(parse)
 	m.add_state(NOTES)
diff --git a/parse/parse-skim.sh b/parse/parse-skim.sh
index d773de7..370a459 100755
--- a/parse/parse-skim.sh
+++ b/parse/parse-skim.sh
@@ -15,4 +15,4 @@ skimnotes get -format txt $f
 
 parse-skim.py < "$filename.txt"
 
-#rm $filename.txt
\ No newline at end of file
+rm $filename.txt
\ No newline at end of file
diff --git a/parse/parse.py b/parse/parse.py
index 227cf9f..df3793a 100755
--- a/parse/parse.py
+++ b/parse/parse.py
@@ -3,7 +3,19 @@
 from statemachine import StateMachine
 import sys, string, re, json
 
-markups = {'QUOTES' : ('PAGE', 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : ('NOTE', '#', 'tags', 'note', 'fpc')}
+def is_quote_identifier(line):
+	l = line.strip().upper()
+	return l.startswith("<!--") and l.find("PAGE") >= 0
+
+def is_note_identifier(line):
+	l = line.strip().upper()
+	return l.startswith("<!--") and l.find("NOTE") >= 0	
+
+def is_tag_identifier(line):
+	l = line.strip()
+	return l.startswith('<') and not l.startswith('<!')
+
+markups = {'QUOTES' : (is_quote_identifier, 'pp', 'tags', 'quote', 'fpc'), 'NOTES' : (is_note_identifier, '#', 'tags', 'note', 'fpc')}
 output = {'QUOTES' : [], 'NOTES' : []}
 
 
@@ -20,7 +32,7 @@ def parse(c):
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		if line[:2] == '##': return section(line), (fp, line)
+		if line.strip().startswith('##'): return section(line), (fp, line)
 		else: continue
 
 def QUOTES(c):
@@ -28,8 +40,8 @@ def QUOTES(c):
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		elif line.strip().upper().startswith('PAGE'): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
-		elif line.strip().startswith(u'##'): return section(line), (fp, line)
+		elif is_quote_identifier(line): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
+		elif line.strip().startswith('##'): return section(line), (fp, line)
 		else: continue
 
 def NOTES(c):	
@@ -37,8 +49,8 @@ def NOTES(c):
 	while 1:
 		line = fp.readline()
 		if not line: return eof, (fp, line)
-		elif line.strip().upper().startswith('NOTE'): return segment, (fp, line, 'NOTES', markups['NOTES'])
-		elif line[:2] == '##': return section(line), (fp, line)
+		elif is_note_identifier(line): return segment, (fp, line, 'NOTES', markups['NOTES'])
+		elif line.strip().startswith('##'): return section(line), (fp, line)
 		else: continue
 
 def segment(c):
@@ -49,25 +61,28 @@ def segment(c):
 	q = ''
 	cc = ''
 	# identifier
-	c = ext_identifier(l)
+	c = extract_identifier(l)
 	while 1:
 		cursor = fp.tell()
 		line = fp.readline()
+
+
 		if not line: 
 			# transition: EOF - record entry
-			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
+			record_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return eof, (fp, line)
-		elif line.strip().upper().startswith(m):
+
+		elif m(line):
 			# transition: new segment - record entry
-			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
+			record_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return segment, (fp, line, sect, mk)
-		elif line[:1] == '<': 
+		elif is_tag_identifier(line): 
 			# tags
-			t += ext_tags(line)
+			t += extract_tags(line)
 			continue
 		elif line[:2] == '##': 
 			# transition: new section - record entry
-			rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
+			record_segment(c, t, q, cc, (sect, x, tt, y, cnt))
 			return section(line), (fp, line)
 		elif line == '\n' :
 			continue
@@ -81,32 +96,39 @@ def segment(c):
 ## helper fncts
 def section(line):
 	line = string.upper(line)
-	if string.find(line, 'NOTES') >= 0: return NOTES
-	elif string.find(line, 'QUOTES') >= 0: return QUOTES
-	elif string.find(line, 'REFERENCE') >= 0: return parse
+	if line.find('NOTES') >= 0: return NOTES
+	elif line.find('QUOTES') >= 0: return QUOTES
+	elif line.find('REFERENCE') >= 0: return parse
 	else: return parse
 
 # todo - optimise this (i.e: id != only the last word)
-def ext_identifier(line):
-	b = string.rsplit(line)
-	return b[-1]
+def extract_identifier(line):
+	t = line.strip().replace('<!--', '').replace('-->', '')
+	return t.strip().rsplit()[-1]
 
-def ext_tags(line):
+def extract_tags(line):
 	line = line.rstrip('\n').replace(' ','')
 	t = re.split('<|>', line)
 	return [v for v in t if v]
 
-def rec_segment(idf, tags, text, cnt, mk):
+def record_segment(idf, tags, text, cnt, mk):
 	if not text:
 		#sys.stderr.write('hmm... no quote on pp.' + idf)
 		return None
-	if text[0] == '>':
-		text = text[1:]
-	text = text.strip()
+	text = escape_quote(text)
+	text = escape_note(text)
 	section_i, idf_i, tags_i, text_i, cnt_i = mk
 	entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
 	output[section_i].append(entry)
 
+def escape_quote(line):
+	if(not line.strip().startswith('>')):
+		return line
+	l = re.sub('\"*\"', '', line.strip()[1:])
+	return re.sub('pp.[0-9]+', '', l)
+
+def escape_note(line):
+	return re.sub('^[0-9]+.', '', line).strip()
 
 if __name__ == '__main__':
 	m = StateMachine();