inject from skim
This commit is contained in:
parent
a6e704c3f8
commit
c3962aa5ad
2
nnnew.sh
2
nnnew.sh
@ -63,6 +63,8 @@ cp -aR $WHERE/template/* $TITLE
|
|||||||
|
|
||||||
cd $TITLE
|
cd $TITLE
|
||||||
|
|
||||||
|
touch notes.mmd
|
||||||
|
|
||||||
# file exists? copy to output
|
# file exists? copy to output
|
||||||
[ -z "$FILE" ] && echo "No file to copy. Make sure to reference in text in the note!" || curl -O $FILE
|
[ -z "$FILE" ] && echo "No file to copy. Make sure to reference in text in the note!" || curl -O $FILE
|
||||||
|
|
||||||
|
|||||||
74
parse/compare.py
Executable file
74
parse/compare.py
Executable file
@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import sys, os, string, json
|
||||||
|
|
||||||
|
fp1 = ''
|
||||||
|
fp2 = ''
|
||||||
|
|
||||||
|
def difference(data1, data2):
|
||||||
|
output = {'QUOTES' : [], 'NOTES' : []}
|
||||||
|
|
||||||
|
d1 = {i['quote']: i for i in data1['QUOTES']}
|
||||||
|
d2 = {i['quote']: i for i in data2['QUOTES']}
|
||||||
|
|
||||||
|
#create sets
|
||||||
|
s1 = set(d1.keys())
|
||||||
|
s2 = set(d2.keys())
|
||||||
|
|
||||||
|
#symmetric difference - nope
|
||||||
|
#diff = list(s1 ^ s2)
|
||||||
|
|
||||||
|
# difference between s2 and s1 (s2 being pdf annotation)
|
||||||
|
diff = list(s2 - s1)
|
||||||
|
|
||||||
|
for d in diff:
|
||||||
|
output['QUOTES'].append(d2[d])
|
||||||
|
|
||||||
|
d1 = {i['note']: i for i in data1['NOTES']}
|
||||||
|
d2 = {i['note']: i for i in data2['NOTES']}
|
||||||
|
|
||||||
|
#create sets
|
||||||
|
s1 = set(d1.keys())
|
||||||
|
s2 = set(d2.keys())
|
||||||
|
|
||||||
|
#symmetric difference - nope
|
||||||
|
#diff = list(s1 ^ s2)
|
||||||
|
|
||||||
|
# difference between s2 and s1 (s2 being pdf annotation)
|
||||||
|
diff = list(s2 - s1)
|
||||||
|
|
||||||
|
for d in diff:
|
||||||
|
output['NOTES'].append(d2[d])
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def open_file(p):
|
||||||
|
if not os.path.exists(p):
|
||||||
|
sys.exit('File %s does not exists... Aborting.' % p)
|
||||||
|
return open(p, 'rb')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
sys.exit('No input file... Aborting.')
|
||||||
|
fp1 = open_file(sys.argv[1])
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
fp2 = sys.stdin
|
||||||
|
else:
|
||||||
|
fp2 = open_file(sys.argv[2])
|
||||||
|
|
||||||
|
data1 = json.load(fp1)
|
||||||
|
data2 = json.load(fp2)
|
||||||
|
|
||||||
|
# print "----"
|
||||||
|
# print data1
|
||||||
|
# print "----"
|
||||||
|
# print data2
|
||||||
|
# print "----"
|
||||||
|
|
||||||
|
fp1.close()
|
||||||
|
fp2.close()
|
||||||
|
|
||||||
|
data = difference(data1, data2)
|
||||||
|
|
||||||
|
json.dump(data, sys.stdout)
|
||||||
|
|
||||||
|
|
||||||
16
parse/compare.sh
Executable file
16
parse/compare.sh
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
INDXFILE='.indx'
|
||||||
|
|
||||||
|
for i in *.mmd; do
|
||||||
|
f=$i
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ! -f $INDXFILE ]]; then
|
||||||
|
make --quiet index
|
||||||
|
elif [[ $f -nt $INDXFILE ]]; then
|
||||||
|
rm $INDXFILE
|
||||||
|
make --quiet index
|
||||||
|
fi
|
||||||
|
|
||||||
|
parse-skim.sh | compare.py .indx
|
||||||
191
parse/inject.py
Executable file
191
parse/inject.py
Executable file
@ -0,0 +1,191 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from statemachine import StateMachine
|
||||||
|
import sys, os, string, json, shutil, codecs
|
||||||
|
|
||||||
|
quote_nbr = sys.maxint
|
||||||
|
fileout = ''
|
||||||
|
fileref = ''
|
||||||
|
notes = []
|
||||||
|
quotes = []
|
||||||
|
|
||||||
|
def error(c):
|
||||||
|
fp, l = c
|
||||||
|
sys.stderr.write('Unidentifiable line:\n'+ l)
|
||||||
|
|
||||||
|
def eof(c):
|
||||||
|
print "eof"
|
||||||
|
return
|
||||||
|
|
||||||
|
def parse(c):
|
||||||
|
fp, l = c
|
||||||
|
while 1:
|
||||||
|
line = fp.readline()
|
||||||
|
if not line: return eof, (fp, line)
|
||||||
|
if line[:2] == '##': return section(line), (fp, line)
|
||||||
|
else:
|
||||||
|
emit_line(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
def QUOTES(c):
|
||||||
|
fp, l = c
|
||||||
|
while 1:
|
||||||
|
line = fp.readline()
|
||||||
|
if not line:
|
||||||
|
emit_remaining_quotes()
|
||||||
|
return eof, (fp, line)
|
||||||
|
elif string.find(string.upper(line), 'PAGE') >= 0: return process_quote, (fp, line)
|
||||||
|
elif line[:2] == '##':
|
||||||
|
emit_remaining_quotes()
|
||||||
|
return section(line), (fp, line)
|
||||||
|
else:
|
||||||
|
emit_line(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
def NOTES(c):
|
||||||
|
fp, l = c
|
||||||
|
while 1:
|
||||||
|
line = fp.readline()
|
||||||
|
if not line:
|
||||||
|
emit_line('\n')
|
||||||
|
emit_remaining_notes()
|
||||||
|
return eof, (fp, line)
|
||||||
|
elif string.find(string.upper(line), 'NOTE') >= 0: return process_note, (fp, line)
|
||||||
|
elif line[:2] == '##':
|
||||||
|
emit_line('\n')
|
||||||
|
emit_remaining_notes()
|
||||||
|
return section(line), (fp, line)
|
||||||
|
else:
|
||||||
|
emit_line(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
def process_quote(c):
|
||||||
|
fp, l = c
|
||||||
|
ppnbr = int(string.rsplit(l)[-1])
|
||||||
|
for i in quotes.keys():
|
||||||
|
if int(i) < ppnbr:
|
||||||
|
emit_quotes(quotes[i])
|
||||||
|
emit_line(l)
|
||||||
|
return QUOTES(c)
|
||||||
|
|
||||||
|
def process_note(c):
|
||||||
|
fp, l = c
|
||||||
|
emit_line(l)
|
||||||
|
return NOTES(c)
|
||||||
|
|
||||||
|
|
||||||
|
####################
|
||||||
|
|
||||||
|
def section(line):
|
||||||
|
emit_line(line)
|
||||||
|
line = string.upper(line)
|
||||||
|
if string.find(line, 'NOTES') >= 0:
|
||||||
|
if not notes:
|
||||||
|
return parse
|
||||||
|
return NOTES
|
||||||
|
elif string.find(line, 'QUOTES') >= 0:
|
||||||
|
if not quotes:
|
||||||
|
return parse
|
||||||
|
return QUOTES
|
||||||
|
elif string.find(line, 'REFERENCE') >= 0: return parse
|
||||||
|
else: return parse
|
||||||
|
|
||||||
|
def emit_remaining_quotes():
|
||||||
|
rest = []
|
||||||
|
for i in quotes:
|
||||||
|
rest.extend(quotes[i])
|
||||||
|
emit_quotes(rest)
|
||||||
|
|
||||||
|
def emit_quotes(list):
|
||||||
|
while list:
|
||||||
|
emit_quote(list.pop())
|
||||||
|
|
||||||
|
def emit_quote(data):
|
||||||
|
emit_line("page " + data['pp'] + "\n\n")
|
||||||
|
emit_line(">" + data['quote'] + "\n")
|
||||||
|
emit_line('\n')
|
||||||
|
|
||||||
|
def emit_remaining_notes():
|
||||||
|
rest = []
|
||||||
|
for i in notes:
|
||||||
|
rest.extend(notes[i])
|
||||||
|
for j in rest:
|
||||||
|
emit_note(j)
|
||||||
|
|
||||||
|
def emit_note(data):
|
||||||
|
emit_line("note: " + data['pp'] + "\n\n")
|
||||||
|
emit_line(data['note'] + "\n" )
|
||||||
|
emit_line('\n')
|
||||||
|
|
||||||
|
def emit_line(l):
|
||||||
|
#l = l.encode('utf-8')
|
||||||
|
fileout.write(l)
|
||||||
|
|
||||||
|
def reoder(q):
|
||||||
|
out = {}
|
||||||
|
while q:
|
||||||
|
i = q.pop()
|
||||||
|
if i['pp'] in out.keys():
|
||||||
|
out[i['pp']].append(i)
|
||||||
|
else:
|
||||||
|
out[i['pp']] = [i]
|
||||||
|
return out
|
||||||
|
|
||||||
|
def open_file(p):
|
||||||
|
if not os.path.exists(p):
|
||||||
|
sys.exit('File %s does not exists... Aborting.' % p)
|
||||||
|
return codecs.open(p, 'rb', 'utf-8')
|
||||||
|
|
||||||
|
def open_fileoutput(p):
|
||||||
|
if not os.path.exists(p):
|
||||||
|
sys.exit('File %s does not exists... Aborting.' % p)
|
||||||
|
return codecs.open(p, 'r+', 'utf-8')
|
||||||
|
|
||||||
|
def backupfile(p):
|
||||||
|
if not os.path.exists(p):
|
||||||
|
sys.exit('File %s does not exists... Aborting.' % p)
|
||||||
|
bak = p + '.bak'
|
||||||
|
shutil.copy2(p, bak)
|
||||||
|
return codecs.open(bak, 'r', 'utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
sys.exit('No input file... Aborting.')
|
||||||
|
# fp1 should be the incoming .mmd file
|
||||||
|
fileref = backupfile(sys.argv[1])
|
||||||
|
fileout = open_fileoutput(sys.argv[1])
|
||||||
|
fileout.seek(0)
|
||||||
|
if len(sys.argv) < 3:
|
||||||
|
fp2 = sys.stdin
|
||||||
|
else:
|
||||||
|
fp2 = open_file(sys.argv[2])
|
||||||
|
|
||||||
|
# fp2 should be the incoming (json) data to inject in fp1
|
||||||
|
data = json.load(fp2)
|
||||||
|
fp2.close()
|
||||||
|
|
||||||
|
print data
|
||||||
|
|
||||||
|
if not data['QUOTES'] and not data['NOTES']:
|
||||||
|
print "Document up-to-date."
|
||||||
|
fileout.close()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
quotes = reoder(data['QUOTES'])
|
||||||
|
notes = reoder(data['NOTES'])
|
||||||
|
|
||||||
|
m = StateMachine();
|
||||||
|
m.add_state(parse)
|
||||||
|
m.add_state(NOTES)
|
||||||
|
m.add_state(QUOTES)
|
||||||
|
m.add_state(process_quote)
|
||||||
|
m.add_state(process_note)
|
||||||
|
m.add_state(error, end_state=1)
|
||||||
|
m.add_state(eof, end_state=1)
|
||||||
|
m.set_start(parse)
|
||||||
|
m.run((fileref, ''))
|
||||||
|
|
||||||
|
fileout.close()
|
||||||
|
fileref.close()
|
||||||
|
|
||||||
|
|
||||||
16
parse/inject.sh
Executable file
16
parse/inject.sh
Executable file
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
for i in *.mmd; do
|
||||||
|
f=$i
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -z '$f' ]]; then
|
||||||
|
echo "No mmd file (markdown source file) in directory... Aborting.";
|
||||||
|
exit;
|
||||||
|
fi
|
||||||
|
|
||||||
|
# inject new nnnotes in source file
|
||||||
|
compare.sh | inject.py $f
|
||||||
|
|
||||||
|
# update index
|
||||||
|
make --quiet index
|
||||||
10
parse/parse-skim.py
Normal file → Executable file
10
parse/parse-skim.py
Normal file → Executable file
@ -24,7 +24,7 @@ def highlight(c):
|
|||||||
fp, l = c
|
fp, l = c
|
||||||
p = page(l)
|
p = page(l)
|
||||||
text = fp.readline()
|
text = fp.readline()
|
||||||
output['QUOTES'].append({'pp' : p, 'quote' : text})
|
output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
|
||||||
return parse(c)
|
return parse(c)
|
||||||
|
|
||||||
def anchored_note(c):
|
def anchored_note(c):
|
||||||
@ -33,22 +33,22 @@ def anchored_note(c):
|
|||||||
text = fp.readline()
|
text = fp.readline()
|
||||||
fp.readline()
|
fp.readline()
|
||||||
note = fp.readline()
|
note = fp.readline()
|
||||||
output['QUOTES'].append({'pp' : p, 'quote' : text})
|
output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
|
||||||
output['NOTES'].append({'pp' : p, 'note' : note})
|
output['NOTES'].append({'pp' : p, 'note' : note.strip()})
|
||||||
return parse(c)
|
return parse(c)
|
||||||
|
|
||||||
def box(c):
|
def box(c):
|
||||||
fp, l = c
|
fp, l = c
|
||||||
p = page(l)
|
p = page(l)
|
||||||
text = fp.readline()
|
text = fp.readline()
|
||||||
output['QUOTES'].append({'pp' : p, 'quote' : text})
|
output['QUOTES'].append({'pp' : p, 'quote' : text.strip()})
|
||||||
return parse(c)
|
return parse(c)
|
||||||
|
|
||||||
def text_note(c):
|
def text_note(c):
|
||||||
fp, l = c
|
fp, l = c
|
||||||
p = page(l)
|
p = page(l)
|
||||||
text = fp.readline()
|
text = fp.readline()
|
||||||
output['NOTES'].append({'pp' : p, 'note' : text})
|
output['NOTES'].append({'pp' : p, 'note' : text.strip()})
|
||||||
return parse(c)
|
return parse(c)
|
||||||
|
|
||||||
## helper fncts
|
## helper fncts
|
||||||
|
|||||||
18
parse/parse-skim.sh
Executable file
18
parse/parse-skim.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
for i in *.pdf; do
|
||||||
|
f=$i
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ -z '$f' ]]; then
|
||||||
|
echo "No pdf file in directory... Aborting.";
|
||||||
|
exit;
|
||||||
|
fi
|
||||||
|
|
||||||
|
filename="${f%.*}"
|
||||||
|
|
||||||
|
skimnotes get -format txt $f
|
||||||
|
|
||||||
|
parse-skim.py < "$filename.txt"
|
||||||
|
|
||||||
|
#rm $filename.txt
|
||||||
@ -25,20 +25,20 @@ def parse(c):
|
|||||||
|
|
||||||
def QUOTES(c):
|
def QUOTES(c):
|
||||||
fp, l = c
|
fp, l = c
|
||||||
sys.stdout.write('QUOTES\n')
|
|
||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if not line: return eof, (fp, line)
|
if not line: return eof, (fp, line)
|
||||||
elif string.find(string.upper(line), 'PAGE') >= 0: return segment, (fp, line, 'QUOTES', markups['QUOTES'])
|
elif line.strip().upper().startswith('PAGE'): return segment, (fp, line, 'QUOTES', markups['QUOTES'])
|
||||||
|
elif line.strip().startswith(u'##'): return section(line), (fp, line)
|
||||||
else: continue
|
else: continue
|
||||||
|
|
||||||
def NOTES(c):
|
def NOTES(c):
|
||||||
fp, l = c
|
fp, l = c
|
||||||
sys.stdout.write('NOTES\n')
|
|
||||||
while 1:
|
while 1:
|
||||||
line = fp.readline()
|
line = fp.readline()
|
||||||
if not line: return eof, (fp, line)
|
if not line: return eof, (fp, line)
|
||||||
elif string.find(string.upper(line), 'NOTE') >= 0: return segment, (fp, line, 'NOTES', markups['NOTES'])
|
elif line.strip().upper().startswith('NOTE'): return segment, (fp, line, 'NOTES', markups['NOTES'])
|
||||||
|
elif line[:2] == '##': return section(line), (fp, line)
|
||||||
else: continue
|
else: continue
|
||||||
|
|
||||||
def segment(c):
|
def segment(c):
|
||||||
@ -57,7 +57,7 @@ def segment(c):
|
|||||||
# transition: EOF - record entry
|
# transition: EOF - record entry
|
||||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||||
return eof, (fp, line)
|
return eof, (fp, line)
|
||||||
elif string.find(string.upper(line), m) >= 0:
|
elif line.strip().upper().startswith(m):
|
||||||
# transition: new segment - record entry
|
# transition: new segment - record entry
|
||||||
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
rec_segment(c, t, q, cc, (sect, x, tt, y, cnt))
|
||||||
return segment, (fp, line, sect, mk)
|
return segment, (fp, line, sect, mk)
|
||||||
@ -84,8 +84,9 @@ def section(line):
|
|||||||
if string.find(line, 'NOTES') >= 0: return NOTES
|
if string.find(line, 'NOTES') >= 0: return NOTES
|
||||||
elif string.find(line, 'QUOTES') >= 0: return QUOTES
|
elif string.find(line, 'QUOTES') >= 0: return QUOTES
|
||||||
elif string.find(line, 'REFERENCE') >= 0: return parse
|
elif string.find(line, 'REFERENCE') >= 0: return parse
|
||||||
else: return error
|
else: return parse
|
||||||
|
|
||||||
|
# todo - optimise this (i.e: id != only the last word)
|
||||||
def ext_identifier(line):
|
def ext_identifier(line):
|
||||||
b = string.rsplit(line)
|
b = string.rsplit(line)
|
||||||
return b[-1]
|
return b[-1]
|
||||||
@ -97,8 +98,11 @@ def ext_tags(line):
|
|||||||
|
|
||||||
def rec_segment(idf, tags, text, cnt, mk):
|
def rec_segment(idf, tags, text, cnt, mk):
|
||||||
if not text:
|
if not text:
|
||||||
print 'hmm... no quote on pp.', c
|
#sys.stderr.write('hmm... no quote on pp.' + idf)
|
||||||
return None
|
return None
|
||||||
|
if text[0] == '>':
|
||||||
|
text = text[1:]
|
||||||
|
text = text.strip()
|
||||||
section_i, idf_i, tags_i, text_i, cnt_i = mk
|
section_i, idf_i, tags_i, text_i, cnt_i = mk
|
||||||
entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
|
entry = {idf_i : idf, text_i : text, tags_i : tags, cnt_i : cnt}
|
||||||
output[section_i].append(entry)
|
output[section_i].append(entry)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user