nnnotes/parse/compare.py

97 lines
2.0 KiB
Python
Raw Normal View History

2014-08-25 12:54:27 +02:00
#!/usr/bin/env python
import sys, os, string, json
2014-09-01 10:35:11 +02:00
from operator import attrgetter
2014-08-25 12:54:27 +02:00
fp1 = ''
fp2 = ''
def difference(data1, data2):
output = {'QUOTES' : [], 'NOTES' : []}
d1 = {i['quote']: i for i in data1['QUOTES']}
d2 = {i['quote']: i for i in data2['QUOTES']}
#create sets
s1 = set(d1.keys())
s2 = set(d2.keys())
#symmetric difference - nope
#diff = list(s1 ^ s2)
# difference between s2 and s1 (s2 being pdf annotation)
diff = list(s2 - s1)
for d in diff:
output['QUOTES'].append(d2[d])
d1 = {i['note']: i for i in data1['NOTES']}
d2 = {i['note']: i for i in data2['NOTES']}
#create sets
s1 = set(d1.keys())
s2 = set(d2.keys())
#symmetric difference - nope
#diff = list(s1 ^ s2)
# difference between s2 and s1 (s2 being pdf annotation)
diff = list(s2 - s1)
for d in diff:
output['NOTES'].append(d2[d])
return output
def open_file(p):
if not os.path.exists(p):
sys.exit('File %s does not exists... Aborting.' % p)
return open(p, 'rb')
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('No input file... Aborting.')
2014-08-25 19:23:13 +02:00
try:
fp1 = open_file(sys.argv[1])
except:
sys.exit("Can't open file " + sys.argv[1] + ". Aborting.")
2014-08-25 12:54:27 +02:00
if len(sys.argv) < 3:
fp2 = sys.stdin
else:
2014-08-25 19:23:13 +02:00
try:
fp2 = open_file(sys.argv[2])
except:
sys.exit("Can't open file " + sys.argv[2] + ". Aborting.")
try:
sdata = fp1.read()
data1 = json.loads(sdata)
except:
e = "<compare> Error loading data from" + sys.argv[1] + ". Aborting.\n"
if sdata:
e += "Traceback: " + sdata1
fp2.close()
sys.exit(e)
finally:
fp1.close()
try:
sdata = fp2.read()
data2 = json.loads(sdata)
2015-06-15 10:08:38 +02:00
except Exception, ee:
2014-08-25 19:23:13 +02:00
e = "<compare> Error loading data. Aborting.\n"
if sdata:
e += "Traceback: " + sdata
fp1.close()
sys.exit(e)
finally:
fp2.close()
2014-08-25 12:54:27 +02:00
data = difference(data1, data2)
2014-09-01 10:35:11 +02:00
data['QUOTES'] = sorted(data['QUOTES'], key=lambda entry: int(entry['pp']))
data['NOTES'] = sorted(data['NOTES'], key=lambda entry: int(entry['pp']))
2014-08-25 12:54:27 +02:00
json.dump(data, sys.stdout)