index(es)
This commit is contained in:
parent
cabfe50777
commit
afc71795d1
75
index.py
Normal file
75
index.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
import argparse, os, sys, json, logging
|
||||||
|
from keywords import rake
|
||||||
|
from lxml import etree as et
|
||||||
|
|
||||||
|
import yake
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
def index(f):
|
||||||
|
|
||||||
|
if not os.path.isfile(f):
|
||||||
|
logging.error(f + " is not a valid file.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# filename should be of the type: N.xxxx.xml
|
||||||
|
#ex: 3.Network.xml
|
||||||
|
ch = os.path.basename(f).split('.')[0]
|
||||||
|
|
||||||
|
indx = {}
|
||||||
|
|
||||||
|
r = rake.Rake('keywords/FoxStoplist.txt', max_words_length=3, min_keyword_frequency=1)
|
||||||
|
|
||||||
|
y = yake.KeywordExtractor(lan="en", top=40)
|
||||||
|
|
||||||
|
root = et.parse(f).getroot()
|
||||||
|
|
||||||
|
for m in root.findall('mails/mail'):
|
||||||
|
|
||||||
|
nbr_str = m.find('nbr').text
|
||||||
|
content = m.find('content').text
|
||||||
|
|
||||||
|
# format nbr
|
||||||
|
nbr_str = ch + '.' + nbr_str
|
||||||
|
|
||||||
|
# yake
|
||||||
|
try:
|
||||||
|
kwy = y.extract_keywords(content)
|
||||||
|
for k in kwy:
|
||||||
|
kw = k[0]
|
||||||
|
if kw not in indx:
|
||||||
|
indx[kw] = []
|
||||||
|
indx[kw].append(nbr_str)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
# rake
|
||||||
|
try:
|
||||||
|
kwr = r.run(content)
|
||||||
|
kwr = [x for x in kwr if x[1] > 4.0]
|
||||||
|
for k in kwr:
|
||||||
|
kw = k[0]
|
||||||
|
if kw not in indx:
|
||||||
|
indx[kw] = []
|
||||||
|
indx[kw].append(nbr_str)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
return indx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser(description='Builds an index of emails')
|
||||||
|
p.add_argument('file', metavar="f", help="xml file to index")
|
||||||
|
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
ind = index(args.file)
|
||||||
|
ind_out = {'selected': {}, 'orphan': ind}
|
||||||
|
|
||||||
|
print(json.dumps(ind_out, indent=4, sort_keys=True, ensure_ascii=False))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
34
index.sh
Executable file
34
index.sh
Executable file
@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
echo "1.Welcome.xml -> 1.Welcome-index.js"
|
||||||
|
python index.py xml/1.Welcome.xml > index/1.Welcome-index.js
|
||||||
|
echo "2.DeepEurope.xml -> 2.DeepEurope-index.js"
|
||||||
|
python index.py xml/2.DeepEurope.xml > index/2.DeepEurope-index.js
|
||||||
|
echo "3.Network.xml -> 3.Network-index.js"
|
||||||
|
python index.py xml/3.Network.xml > index/3.Network-index.js
|
||||||
|
echo "4.net.art.xml -> 4.net.art-index.js"
|
||||||
|
python index.py xml/4.net.art.xml > index/4.net.art-index.js
|
||||||
|
echo "5.Netzkritik.xml -> 5.Netzkritik-index.js"
|
||||||
|
python index.py xml/5.Netzkritik.xml > index/5.Netzkritik-index.js
|
||||||
|
echo "6.FLOSS.xml -> 6.FLOSS-index.js"
|
||||||
|
python index.py xml/6.FLOSS.xml > index/6.FLOSS-index.js
|
||||||
|
echo "7.Critique_Art_Politics.xml -> 7.Critique_Art_Politics-index.js"
|
||||||
|
python index.py xml/7.Critique_Art_Politics.xml > index/7.Critique_Art_Politics-index.js
|
||||||
|
echo "8.TacticalMedia.xml -> 8.TacticalMedia-index.js"
|
||||||
|
python index.py xml/8.TacticalMedia.xml > index/8.TacticalMedia-index.js
|
||||||
|
echo "9.List_talking_to_List.xml -> 9.List_talking_to_List-index.js"
|
||||||
|
python index.py xml/9.List_talking_to_List.xml > index/9.List_talking_to_List-index.js
|
||||||
|
echo "10.Cyberfeminism.xml -> 10.Cyberfeminism-index.js"
|
||||||
|
python index.py xml/10.Cyberfeminism.xml > index/10.Cyberfeminism-index.js
|
||||||
|
echo "11.CODE.xml -> 11.CODE-index.js"
|
||||||
|
python index.py xml/11.CODE.xml > index/11.CODE-index.js
|
||||||
|
echo "13.Post-digital.xml -> 13.Post-digital-index.js"
|
||||||
|
python index.py xml/13.Post-digital.xml > index/13.Post-digital-index.js
|
||||||
|
echo "14.MANIFESTO.xml -> 14.MANIFESTO-index.js"
|
||||||
|
python index.py xml/14.MANIFESTO.xml > index/14.MANIFESTO-index.js
|
||||||
|
echo "15.LutherBlissett.xml -> 15.LutherBlissett-index.js"
|
||||||
|
python index.py xml/15.LutherBlissett.xml > index/15.LutherBlissett-index.js
|
||||||
|
echo "16.NN.xml -> 16.NN-index.js"
|
||||||
|
python index.py xml/16.NN.xml > index/16.NN-index.js
|
||||||
|
echo "17.Interviews.xml -> 17.Interviews-index.js"
|
||||||
|
python index.py xml/17.Interviews.xml > index/17.Interviews-index.js
|
||||||
1039
index/1.Welcome-index.js
Normal file
1039
index/1.Welcome-index.js
Normal file
File diff suppressed because it is too large
Load Diff
5072
index/10.Cyberfeminism-index.js
Normal file
5072
index/10.Cyberfeminism-index.js
Normal file
File diff suppressed because it is too large
Load Diff
5902
index/11.CODE-index.js
Normal file
5902
index/11.CODE-index.js
Normal file
File diff suppressed because it is too large
Load Diff
8037
index/13.Post-digital-index.js
Normal file
8037
index/13.Post-digital-index.js
Normal file
File diff suppressed because it is too large
Load Diff
7353
index/14.MANIFESTO-index.js
Normal file
7353
index/14.MANIFESTO-index.js
Normal file
File diff suppressed because it is too large
Load Diff
6879
index/15.LutherBlissett-index.js
Normal file
6879
index/15.LutherBlissett-index.js
Normal file
File diff suppressed because it is too large
Load Diff
21363
index/16.NN-index.js
Normal file
21363
index/16.NN-index.js
Normal file
File diff suppressed because it is too large
Load Diff
23676
index/17.Interviews-index.js
Normal file
23676
index/17.Interviews-index.js
Normal file
File diff suppressed because it is too large
Load Diff
2894
index/2.DeepEurope-index.js
Normal file
2894
index/2.DeepEurope-index.js
Normal file
File diff suppressed because it is too large
Load Diff
16602
index/3.Network-index.js
Normal file
16602
index/3.Network-index.js
Normal file
File diff suppressed because it is too large
Load Diff
9845
index/4.net.art-index.js
Normal file
9845
index/4.net.art-index.js
Normal file
File diff suppressed because it is too large
Load Diff
4579
index/5.Netzkritik-index.js
Normal file
4579
index/5.Netzkritik-index.js
Normal file
File diff suppressed because it is too large
Load Diff
5835
index/6.FLOSS-index.js
Normal file
5835
index/6.FLOSS-index.js
Normal file
File diff suppressed because it is too large
Load Diff
4849
index/7.Critique_Art_Politics-index.js
Normal file
4849
index/7.Critique_Art_Politics-index.js
Normal file
File diff suppressed because it is too large
Load Diff
11893
index/8.TacticalMedia-index.js
Normal file
11893
index/8.TacticalMedia-index.js
Normal file
File diff suppressed because it is too large
Load Diff
14346
index/9.List_talking_to_List-index.js
Normal file
14346
index/9.List_talking_to_List-index.js
Normal file
File diff suppressed because it is too large
Load Diff
426
keywords/FoxStoplist.txt
Normal file
426
keywords/FoxStoplist.txt
Normal file
@ -0,0 +1,426 @@
|
|||||||
|
#From "A stop list for general text" Fox 1989
|
||||||
|
a
|
||||||
|
about
|
||||||
|
above
|
||||||
|
across
|
||||||
|
after
|
||||||
|
again
|
||||||
|
against
|
||||||
|
all
|
||||||
|
almost
|
||||||
|
alone
|
||||||
|
along
|
||||||
|
already
|
||||||
|
also
|
||||||
|
although
|
||||||
|
always
|
||||||
|
among
|
||||||
|
an
|
||||||
|
and
|
||||||
|
another
|
||||||
|
any
|
||||||
|
anybody
|
||||||
|
anyone
|
||||||
|
anything
|
||||||
|
anywhere
|
||||||
|
are
|
||||||
|
area
|
||||||
|
areas
|
||||||
|
around
|
||||||
|
as
|
||||||
|
ask
|
||||||
|
asked
|
||||||
|
asking
|
||||||
|
asks
|
||||||
|
at
|
||||||
|
away
|
||||||
|
b
|
||||||
|
back
|
||||||
|
backed
|
||||||
|
backing
|
||||||
|
backs
|
||||||
|
be
|
||||||
|
because
|
||||||
|
became
|
||||||
|
become
|
||||||
|
becomes
|
||||||
|
been
|
||||||
|
before
|
||||||
|
began
|
||||||
|
behind
|
||||||
|
being
|
||||||
|
beings
|
||||||
|
best
|
||||||
|
better
|
||||||
|
between
|
||||||
|
big
|
||||||
|
both
|
||||||
|
but
|
||||||
|
by
|
||||||
|
c
|
||||||
|
came
|
||||||
|
can
|
||||||
|
cannot
|
||||||
|
case
|
||||||
|
cases
|
||||||
|
certain
|
||||||
|
certainly
|
||||||
|
clear
|
||||||
|
clearly
|
||||||
|
come
|
||||||
|
could
|
||||||
|
d
|
||||||
|
did
|
||||||
|
differ
|
||||||
|
different
|
||||||
|
differently
|
||||||
|
do
|
||||||
|
does
|
||||||
|
done
|
||||||
|
down
|
||||||
|
downed
|
||||||
|
downing
|
||||||
|
downs
|
||||||
|
during
|
||||||
|
e
|
||||||
|
each
|
||||||
|
early
|
||||||
|
either
|
||||||
|
end
|
||||||
|
ended
|
||||||
|
ending
|
||||||
|
ends
|
||||||
|
enough
|
||||||
|
even
|
||||||
|
evenly
|
||||||
|
ever
|
||||||
|
every
|
||||||
|
everybody
|
||||||
|
everyone
|
||||||
|
everything
|
||||||
|
everywhere
|
||||||
|
f
|
||||||
|
face
|
||||||
|
faces
|
||||||
|
fact
|
||||||
|
facts
|
||||||
|
far
|
||||||
|
felt
|
||||||
|
few
|
||||||
|
find
|
||||||
|
finds
|
||||||
|
first
|
||||||
|
for
|
||||||
|
four
|
||||||
|
from
|
||||||
|
full
|
||||||
|
fully
|
||||||
|
further
|
||||||
|
furthered
|
||||||
|
furthering
|
||||||
|
furthers
|
||||||
|
g
|
||||||
|
gave
|
||||||
|
general
|
||||||
|
generally
|
||||||
|
get
|
||||||
|
gets
|
||||||
|
give
|
||||||
|
given
|
||||||
|
gives
|
||||||
|
go
|
||||||
|
going
|
||||||
|
good
|
||||||
|
goods
|
||||||
|
got
|
||||||
|
great
|
||||||
|
greater
|
||||||
|
greatest
|
||||||
|
group
|
||||||
|
grouped
|
||||||
|
grouping
|
||||||
|
groups
|
||||||
|
h
|
||||||
|
had
|
||||||
|
has
|
||||||
|
have
|
||||||
|
having
|
||||||
|
he
|
||||||
|
her
|
||||||
|
herself
|
||||||
|
here
|
||||||
|
high
|
||||||
|
higher
|
||||||
|
highest
|
||||||
|
him
|
||||||
|
himself
|
||||||
|
his
|
||||||
|
how
|
||||||
|
however
|
||||||
|
i
|
||||||
|
if
|
||||||
|
important
|
||||||
|
in
|
||||||
|
interest
|
||||||
|
interested
|
||||||
|
interesting
|
||||||
|
interests
|
||||||
|
into
|
||||||
|
is
|
||||||
|
it
|
||||||
|
its
|
||||||
|
itself
|
||||||
|
j
|
||||||
|
just
|
||||||
|
k
|
||||||
|
keep
|
||||||
|
keeps
|
||||||
|
kind
|
||||||
|
knew
|
||||||
|
know
|
||||||
|
known
|
||||||
|
knows
|
||||||
|
l
|
||||||
|
large
|
||||||
|
largely
|
||||||
|
last
|
||||||
|
later
|
||||||
|
latest
|
||||||
|
least
|
||||||
|
less
|
||||||
|
let
|
||||||
|
lets
|
||||||
|
like
|
||||||
|
likely
|
||||||
|
long
|
||||||
|
longer
|
||||||
|
longest
|
||||||
|
m
|
||||||
|
made
|
||||||
|
make
|
||||||
|
making
|
||||||
|
man
|
||||||
|
many
|
||||||
|
may
|
||||||
|
me
|
||||||
|
member
|
||||||
|
members
|
||||||
|
men
|
||||||
|
might
|
||||||
|
more
|
||||||
|
most
|
||||||
|
mostly
|
||||||
|
mr
|
||||||
|
mrs
|
||||||
|
much
|
||||||
|
must
|
||||||
|
my
|
||||||
|
myself
|
||||||
|
n
|
||||||
|
necessary
|
||||||
|
need
|
||||||
|
needed
|
||||||
|
needing
|
||||||
|
needs
|
||||||
|
never
|
||||||
|
new
|
||||||
|
newer
|
||||||
|
newest
|
||||||
|
next
|
||||||
|
no
|
||||||
|
non
|
||||||
|
not
|
||||||
|
nobody
|
||||||
|
noone
|
||||||
|
nothing
|
||||||
|
now
|
||||||
|
nowhere
|
||||||
|
number
|
||||||
|
numbered
|
||||||
|
numbering
|
||||||
|
numbers
|
||||||
|
o
|
||||||
|
of
|
||||||
|
off
|
||||||
|
often
|
||||||
|
old
|
||||||
|
older
|
||||||
|
oldest
|
||||||
|
on
|
||||||
|
once
|
||||||
|
one
|
||||||
|
only
|
||||||
|
open
|
||||||
|
opened
|
||||||
|
opening
|
||||||
|
opens
|
||||||
|
or
|
||||||
|
order
|
||||||
|
ordered
|
||||||
|
ordering
|
||||||
|
orders
|
||||||
|
other
|
||||||
|
others
|
||||||
|
our
|
||||||
|
out
|
||||||
|
over
|
||||||
|
p
|
||||||
|
part
|
||||||
|
parted
|
||||||
|
parting
|
||||||
|
parts
|
||||||
|
per
|
||||||
|
perhaps
|
||||||
|
place
|
||||||
|
places
|
||||||
|
point
|
||||||
|
pointed
|
||||||
|
pointing
|
||||||
|
points
|
||||||
|
possible
|
||||||
|
present
|
||||||
|
presented
|
||||||
|
presenting
|
||||||
|
presents
|
||||||
|
problem
|
||||||
|
problems
|
||||||
|
put
|
||||||
|
puts
|
||||||
|
q
|
||||||
|
quite
|
||||||
|
r
|
||||||
|
rather
|
||||||
|
really
|
||||||
|
right
|
||||||
|
room
|
||||||
|
rooms
|
||||||
|
s
|
||||||
|
said
|
||||||
|
same
|
||||||
|
saw
|
||||||
|
say
|
||||||
|
says
|
||||||
|
second
|
||||||
|
seconds
|
||||||
|
see
|
||||||
|
seem
|
||||||
|
seemed
|
||||||
|
seeming
|
||||||
|
seems
|
||||||
|
sees
|
||||||
|
several
|
||||||
|
shall
|
||||||
|
she
|
||||||
|
should
|
||||||
|
show
|
||||||
|
showed
|
||||||
|
showing
|
||||||
|
shows
|
||||||
|
side
|
||||||
|
sides
|
||||||
|
since
|
||||||
|
small
|
||||||
|
smaller
|
||||||
|
smallest
|
||||||
|
so
|
||||||
|
some
|
||||||
|
somebody
|
||||||
|
someone
|
||||||
|
something
|
||||||
|
somewhere
|
||||||
|
state
|
||||||
|
states
|
||||||
|
still
|
||||||
|
such
|
||||||
|
sure
|
||||||
|
t
|
||||||
|
take
|
||||||
|
taken
|
||||||
|
than
|
||||||
|
that
|
||||||
|
the
|
||||||
|
their
|
||||||
|
them
|
||||||
|
then
|
||||||
|
there
|
||||||
|
therefore
|
||||||
|
these
|
||||||
|
they
|
||||||
|
thing
|
||||||
|
things
|
||||||
|
think
|
||||||
|
thinks
|
||||||
|
this
|
||||||
|
those
|
||||||
|
though
|
||||||
|
thought
|
||||||
|
thoughts
|
||||||
|
three
|
||||||
|
through
|
||||||
|
thus
|
||||||
|
to
|
||||||
|
today
|
||||||
|
together
|
||||||
|
too
|
||||||
|
took
|
||||||
|
toward
|
||||||
|
turn
|
||||||
|
turned
|
||||||
|
turning
|
||||||
|
turns
|
||||||
|
two
|
||||||
|
u
|
||||||
|
under
|
||||||
|
until
|
||||||
|
up
|
||||||
|
upon
|
||||||
|
us
|
||||||
|
use
|
||||||
|
uses
|
||||||
|
used
|
||||||
|
v
|
||||||
|
very
|
||||||
|
w
|
||||||
|
want
|
||||||
|
wanted
|
||||||
|
wanting
|
||||||
|
wants
|
||||||
|
was
|
||||||
|
way
|
||||||
|
ways
|
||||||
|
we
|
||||||
|
well
|
||||||
|
wells
|
||||||
|
went
|
||||||
|
were
|
||||||
|
what
|
||||||
|
when
|
||||||
|
where
|
||||||
|
whether
|
||||||
|
which
|
||||||
|
while
|
||||||
|
who
|
||||||
|
whole
|
||||||
|
whose
|
||||||
|
why
|
||||||
|
will
|
||||||
|
with
|
||||||
|
within
|
||||||
|
without
|
||||||
|
work
|
||||||
|
worked
|
||||||
|
working
|
||||||
|
works
|
||||||
|
would
|
||||||
|
x
|
||||||
|
y
|
||||||
|
year
|
||||||
|
years
|
||||||
|
yet
|
||||||
|
you
|
||||||
|
young
|
||||||
|
younger
|
||||||
|
youngest
|
||||||
|
your
|
||||||
|
yours
|
||||||
|
z
|
||||||
168
keywords/oldrake.py
Normal file
168
keywords/oldrake.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
# Implementation of RAKE - Rapid Automtic Keyword Exraction algorithm
|
||||||
|
# as described in:
|
||||||
|
# Rose, S., D. Engel, N. Cramer, and W. Cowley (2010).
|
||||||
|
# Automatic keyword extraction from indi-vidual documents.
|
||||||
|
# In M. W. Berry and J. Kogan (Eds.), Text Mining: Applications and Theory.unknown: John Wiley and Sons, Ltd.
|
||||||
|
|
||||||
|
import re
|
||||||
|
import operator
|
||||||
|
|
||||||
|
debug = False
|
||||||
|
test = True
|
||||||
|
|
||||||
|
|
||||||
|
def is_number(s):
|
||||||
|
try:
|
||||||
|
float(s) if '.' in s else int(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_stop_words(stop_word_file):
|
||||||
|
"""
|
||||||
|
Utility function to load stop words from a file and return as a list of words
|
||||||
|
@param stop_word_file Path and file name of a file containing stop words.
|
||||||
|
@return list A list of stop words.
|
||||||
|
"""
|
||||||
|
stop_words = []
|
||||||
|
for line in open(stop_word_file):
|
||||||
|
if line.strip()[0:1] != "#":
|
||||||
|
for word in line.split(): # in case more than one per line
|
||||||
|
stop_words.append(word)
|
||||||
|
return stop_words
|
||||||
|
|
||||||
|
|
||||||
|
def separate_words(text, min_word_return_size):
|
||||||
|
"""
|
||||||
|
Utility function to return a list of all words that are have a length greater than a specified number of characters.
|
||||||
|
@param text The text that must be split in to words.
|
||||||
|
@param min_word_return_size The minimum no of characters a word must have to be included.
|
||||||
|
"""
|
||||||
|
splitter = re.compile('[^a-zA-Z0-9_\\+\\-/]')
|
||||||
|
words = []
|
||||||
|
for single_word in splitter.split(text):
|
||||||
|
current_word = single_word.strip().lower()
|
||||||
|
#leave numbers in phrase, but don't count as words, since they tend to invalidate scores of their phrases
|
||||||
|
if len(current_word) > min_word_return_size and current_word != '' and not is_number(current_word):
|
||||||
|
words.append(current_word)
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def split_sentences(text):
|
||||||
|
"""
|
||||||
|
Utility function to return a list of sentences.
|
||||||
|
@param text The text that must be split in to sentences.
|
||||||
|
"""
|
||||||
|
sentence_delimiters = re.compile(u'[.!?,;:\t\\\\"\\(\\)\\\'\u2019\u2013]|\\s\\-\\s')
|
||||||
|
sentences = sentence_delimiters.split(text)
|
||||||
|
return sentences
|
||||||
|
|
||||||
|
|
||||||
|
def build_stop_word_regex(stop_word_file_path):
|
||||||
|
stop_word_list = load_stop_words(stop_word_file_path)
|
||||||
|
stop_word_regex_list = []
|
||||||
|
for word in stop_word_list:
|
||||||
|
word_regex = r'\b' + word + r'(?![\w-])' # added look ahead for hyphen
|
||||||
|
stop_word_regex_list.append(word_regex)
|
||||||
|
stop_word_pattern = re.compile('|'.join(stop_word_regex_list), re.IGNORECASE)
|
||||||
|
return stop_word_pattern
|
||||||
|
|
||||||
|
|
||||||
|
def generate_candidate_keywords(sentence_list, stopword_pattern):
|
||||||
|
phrase_list = []
|
||||||
|
for s in sentence_list:
|
||||||
|
tmp = re.sub(stopword_pattern, '|', s.strip())
|
||||||
|
phrases = tmp.split("|")
|
||||||
|
for phrase in phrases:
|
||||||
|
phrase = phrase.strip().lower()
|
||||||
|
if phrase != "":
|
||||||
|
phrase_list.append(phrase)
|
||||||
|
return phrase_list
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_word_scores(phraseList):
|
||||||
|
word_frequency = {}
|
||||||
|
word_degree = {}
|
||||||
|
for phrase in phraseList:
|
||||||
|
word_list = separate_words(phrase, 0)
|
||||||
|
word_list_length = len(word_list)
|
||||||
|
word_list_degree = word_list_length - 1
|
||||||
|
#if word_list_degree > 3: word_list_degree = 3 #exp.
|
||||||
|
for word in word_list:
|
||||||
|
word_frequency.setdefault(word, 0)
|
||||||
|
word_frequency[word] += 1
|
||||||
|
word_degree.setdefault(word, 0)
|
||||||
|
word_degree[word] += word_list_degree #orig.
|
||||||
|
#word_degree[word] += 1/(word_list_length*1.0) #exp.
|
||||||
|
for item in word_frequency:
|
||||||
|
word_degree[item] = word_degree[item] + word_frequency[item]
|
||||||
|
|
||||||
|
# Calculate Word scores = deg(w)/frew(w)
|
||||||
|
word_score = {}
|
||||||
|
for item in word_frequency:
|
||||||
|
word_score.setdefault(item, 0)
|
||||||
|
word_score[item] = word_degree[item] / (word_frequency[item] * 1.0) #orig.
|
||||||
|
#word_score[item] = word_frequency[item]/(word_degree[item] * 1.0) #exp.
|
||||||
|
return word_score
|
||||||
|
|
||||||
|
|
||||||
|
def generate_candidate_keyword_scores(phrase_list, word_score):
|
||||||
|
keyword_candidates = {}
|
||||||
|
for phrase in phrase_list:
|
||||||
|
keyword_candidates.setdefault(phrase, 0)
|
||||||
|
word_list = separate_words(phrase, 0)
|
||||||
|
candidate_score = 0
|
||||||
|
for word in word_list:
|
||||||
|
candidate_score += word_score[word]
|
||||||
|
keyword_candidates[phrase] = candidate_score
|
||||||
|
return keyword_candidates
|
||||||
|
|
||||||
|
|
||||||
|
class Rake(object):
|
||||||
|
def __init__(self, stop_words_path):
|
||||||
|
self.stop_words_path = stop_words_path
|
||||||
|
self.__stop_words_pattern = build_stop_word_regex(stop_words_path)
|
||||||
|
|
||||||
|
def run(self, text):
|
||||||
|
sentence_list = split_sentences(text)
|
||||||
|
|
||||||
|
phrase_list = generate_candidate_keywords(sentence_list, self.__stop_words_pattern)
|
||||||
|
|
||||||
|
word_scores = calculate_word_scores(phrase_list)
|
||||||
|
|
||||||
|
keyword_candidates = generate_candidate_keyword_scores(phrase_list, word_scores)
|
||||||
|
|
||||||
|
sorted_keywords = sorted(keyword_candidates.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
return sorted_keywords
|
||||||
|
|
||||||
|
|
||||||
|
# if test:
|
||||||
|
# text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types."
|
||||||
|
|
||||||
|
# # Split text into sentences
|
||||||
|
# sentenceList = split_sentences(text)
|
||||||
|
# #stoppath = "FoxStoplist.txt" #Fox stoplist contains "numbers", so it will not find "natural numbers" like in Table 1.1
|
||||||
|
# stoppath = "SmartStoplist.txt" #SMART stoplist misses some of the lower-scoring keywords in Figure 1.5, which means that the top 1/3 cuts off one of the 4.0 score words in Table 1.1
|
||||||
|
# stopwordpattern = build_stop_word_regex(stoppath)
|
||||||
|
|
||||||
|
# # generate candidate keywords
|
||||||
|
# phraseList = generate_candidate_keywords(sentenceList, stopwordpattern)
|
||||||
|
|
||||||
|
# # calculate individual word scores
|
||||||
|
# wordscores = calculate_word_scores(phraseList)
|
||||||
|
|
||||||
|
# # generate candidate keyword scores
|
||||||
|
# keywordcandidates = generate_candidate_keyword_scores(phraseList, wordscores)
|
||||||
|
# if debug: print keywordcandidates
|
||||||
|
|
||||||
|
# sortedKeywords = sorted(keywordcandidates.iteritems(), key=operator.itemgetter(1), reverse=True)
|
||||||
|
# if debug: print sortedKeywords
|
||||||
|
|
||||||
|
# totalKeywords = len(sortedKeywords)
|
||||||
|
# if debug: print totalKeywords
|
||||||
|
# print sortedKeywords[0:(totalKeywords / 3)]
|
||||||
|
|
||||||
|
# rake = Rake("SmartStoplist.txt")
|
||||||
|
# keywords = rake.run(text)
|
||||||
|
# print keywords
|
||||||
301
keywords/rake.py
Normal file
301
keywords/rake.py
Normal file
@ -0,0 +1,301 @@
|
|||||||
|
# Implementation of RAKE - Rapid Automatic Keyword Extraction algorithm
|
||||||
|
# as described in:
|
||||||
|
# Rose, S., D. Engel, N. Cramer, and W. Cowley (2010).
|
||||||
|
# Automatic keyword extraction from individual documents.
|
||||||
|
# In M. W. Berry and J. Kogan (Eds.), Text Mining: Applications and Theory.unknown: John Wiley and Sons, Ltd.
|
||||||
|
#
|
||||||
|
# NOTE: The original code (from https://github.com/aneesha/RAKE)
|
||||||
|
# has been extended by a_medelyan (zelandiya)
|
||||||
|
# with a set of heuristics to decide whether a phrase is an acceptable candidate
|
||||||
|
# as well as the ability to set frequency and phrase length parameters
|
||||||
|
# important when dealing with longer documents
|
||||||
|
#
|
||||||
|
# NOTE 2: The code published by a_medelyan (https://github.com/zelandiya/RAKE-tutorial)
|
||||||
|
# has been additionally extended by Marco Pegoraro to implement the adjoined candidate
|
||||||
|
# feature described in section 1.2.3 of the original paper. Note that this creates the
|
||||||
|
# need to modify the metric for the candidate score, because the adjoined candidates
|
||||||
|
# have a very high score (because of the nature of the original score metric)
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import print_function
|
||||||
|
import re
|
||||||
|
import operator
|
||||||
|
# import six
|
||||||
|
# from six.moves import range
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
debug = False
|
||||||
|
test = False
|
||||||
|
|
||||||
|
|
||||||
|
def is_number(s):
|
||||||
|
try:
|
||||||
|
float(s) if '.' in s else int(s)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_stop_words(stop_word_file):
|
||||||
|
"""
|
||||||
|
Utility function to load stop words from a file and return as a list of words
|
||||||
|
@param stop_word_file Path and file name of a file containing stop words.
|
||||||
|
@return list A list of stop words.
|
||||||
|
"""
|
||||||
|
stop_words = []
|
||||||
|
for line in open(stop_word_file):
|
||||||
|
if line.strip()[0:1] != "#":
|
||||||
|
for word in line.split(): # in case more than one per line
|
||||||
|
stop_words.append(word)
|
||||||
|
return stop_words
|
||||||
|
|
||||||
|
|
||||||
|
def separate_words(text, min_word_return_size):
|
||||||
|
"""
|
||||||
|
Utility function to return a list of all words that are have a length greater than a specified number of characters.
|
||||||
|
@param text The text that must be split in to words.
|
||||||
|
@param min_word_return_size The minimum no of characters a word must have to be included.
|
||||||
|
"""
|
||||||
|
splitter = re.compile('[^a-zA-Z0-9_\\+\\-/]')
|
||||||
|
words = []
|
||||||
|
for single_word in splitter.split(text):
|
||||||
|
current_word = single_word.strip().lower()
|
||||||
|
# leave numbers in phrase, but don't count as words, since they tend to invalidate scores of their phrases
|
||||||
|
if len(current_word) > min_word_return_size and current_word != '' and not is_number(current_word):
|
||||||
|
words.append(current_word)
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def split_sentences(text):
|
||||||
|
"""
|
||||||
|
Utility function to return a list of sentences.
|
||||||
|
@param text The text that must be split in to sentences.
|
||||||
|
"""
|
||||||
|
sentence_delimiters = re.compile(u'[\\[\\]\n.!?,;:\t\\-\\"\\(\\)\\\'\u2019\u2013]')
|
||||||
|
sentences = sentence_delimiters.split(text)
|
||||||
|
return sentences
|
||||||
|
|
||||||
|
|
||||||
|
def build_stop_word_regex(stop_word_list):
|
||||||
|
stop_word_regex_list = []
|
||||||
|
for word in stop_word_list:
|
||||||
|
word_regex = '\\b' + word + '\\b'
|
||||||
|
stop_word_regex_list.append(word_regex)
|
||||||
|
stop_word_pattern = re.compile('|'.join(stop_word_regex_list), re.IGNORECASE)
|
||||||
|
return stop_word_pattern
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Function that extracts the adjoined candidates from a list of sentences and filters them by frequency
|
||||||
|
#
|
||||||
|
def extract_adjoined_candidates(sentence_list, stoplist, min_keywords, max_keywords, min_freq):
|
||||||
|
adjoined_candidates = []
|
||||||
|
for s in sentence_list:
|
||||||
|
# Extracts the candidates from each single sentence and adds them to the list
|
||||||
|
adjoined_candidates += adjoined_candidates_from_sentence(s, stoplist, min_keywords, max_keywords)
|
||||||
|
# Filters the candidates and returns them
|
||||||
|
return filter_adjoined_candidates(adjoined_candidates, min_freq)
|
||||||
|
|
||||||
|
|
||||||
|
# return adjoined_candidates
|
||||||
|
|
||||||
|
#
|
||||||
|
# Function that extracts the adjoined candidates from a single sentence
|
||||||
|
#
|
||||||
|
def adjoined_candidates_from_sentence(s, stoplist, min_keywords, max_keywords):
|
||||||
|
# Initializes the candidate list to empty
|
||||||
|
candidates = []
|
||||||
|
# Splits the sentence to get a list of lowercase words
|
||||||
|
sl = s.lower().split()
|
||||||
|
# For each possible length of the adjoined candidate
|
||||||
|
for num_keywords in range(min_keywords, max_keywords + 1):
|
||||||
|
# Until the third-last word
|
||||||
|
for i in range(0, len(sl) - num_keywords):
|
||||||
|
# Position i marks the first word of the candidate. Proceeds only if it's not a stopword
|
||||||
|
if sl[i] not in stoplist:
|
||||||
|
candidate = sl[i]
|
||||||
|
# Initializes j (the pointer to the next word) to 1
|
||||||
|
j = 1
|
||||||
|
# Initializes the word counter. This counts the non-stopwords words in the candidate
|
||||||
|
keyword_counter = 1
|
||||||
|
contains_stopword = False
|
||||||
|
# Until the word count reaches the maximum number of keywords or the end is reached
|
||||||
|
while keyword_counter < num_keywords and i + j < len(sl):
|
||||||
|
# Adds the next word to the candidate
|
||||||
|
candidate = candidate + ' ' + sl[i + j]
|
||||||
|
# If it's not a stopword, increase the word counter. If it is, turn on the flag
|
||||||
|
if sl[i + j] not in stoplist:
|
||||||
|
keyword_counter += 1
|
||||||
|
else:
|
||||||
|
contains_stopword = True
|
||||||
|
# Next position
|
||||||
|
j += 1
|
||||||
|
# Adds the candidate to the list only if:
|
||||||
|
# 1) it contains at least a stopword (if it doesn't it's already been considered)
|
||||||
|
# AND
|
||||||
|
# 2) the last word is not a stopword
|
||||||
|
# AND
|
||||||
|
# 3) the adjoined candidate keyphrase contains exactly the correct number of keywords (to avoid doubles)
|
||||||
|
if contains_stopword and candidate.split()[-1] not in stoplist and keyword_counter == num_keywords:
|
||||||
|
candidates.append(candidate)
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Function that filters the adjoined candidates to keep only those that appears with a certain frequency
|
||||||
|
#
|
||||||
|
def filter_adjoined_candidates(candidates, min_freq):
|
||||||
|
# Creates a dictionary where the key is the candidate and the value is the frequency of the candidate
|
||||||
|
candidates_freq = Counter(candidates)
|
||||||
|
filtered_candidates = []
|
||||||
|
# Uses the dictionary to filter the candidates
|
||||||
|
for candidate in candidates:
|
||||||
|
freq = candidates_freq[candidate]
|
||||||
|
if freq >= min_freq:
|
||||||
|
filtered_candidates.append(candidate)
|
||||||
|
return filtered_candidates
|
||||||
|
|
||||||
|
|
||||||
|
def generate_candidate_keywords(sentence_list, stopword_pattern, stop_word_list, min_char_length=1, max_words_length=5,
|
||||||
|
min_words_length_adj=1, max_words_length_adj=1, min_phrase_freq_adj=2):
|
||||||
|
phrase_list = []
|
||||||
|
for s in sentence_list:
|
||||||
|
tmp = re.sub(stopword_pattern, '|', s.strip())
|
||||||
|
phrases = tmp.split("|")
|
||||||
|
for phrase in phrases:
|
||||||
|
phrase = phrase.strip().lower()
|
||||||
|
if phrase != "" and is_acceptable(phrase, min_char_length, max_words_length):
|
||||||
|
phrase_list.append(phrase)
|
||||||
|
phrase_list += extract_adjoined_candidates(sentence_list, stop_word_list, min_words_length_adj,
|
||||||
|
max_words_length_adj, min_phrase_freq_adj)
|
||||||
|
return phrase_list
|
||||||
|
|
||||||
|
|
||||||
|
def is_acceptable(phrase, min_char_length, max_words_length):
|
||||||
|
# a phrase must have a min length in characters
|
||||||
|
if len(phrase) < min_char_length:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# a phrase must have a max number of words
|
||||||
|
words = phrase.split()
|
||||||
|
if len(words) > max_words_length:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
digits = 0
|
||||||
|
alpha = 0
|
||||||
|
for i in range(0, len(phrase)):
|
||||||
|
if phrase[i].isdigit():
|
||||||
|
digits += 1
|
||||||
|
elif phrase[i].isalpha():
|
||||||
|
alpha += 1
|
||||||
|
|
||||||
|
# a phrase must have at least one alpha character
|
||||||
|
if alpha == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# a phrase must have more alpha than digits characters
|
||||||
|
if digits > alpha:
|
||||||
|
return 0
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_word_scores(phraseList):
|
||||||
|
word_frequency = {}
|
||||||
|
word_degree = {}
|
||||||
|
for phrase in phraseList:
|
||||||
|
word_list = separate_words(phrase, 0)
|
||||||
|
word_list_length = len(word_list)
|
||||||
|
word_list_degree = word_list_length - 1
|
||||||
|
# if word_list_degree > 3: word_list_degree = 3 #exp.
|
||||||
|
for word in word_list:
|
||||||
|
word_frequency.setdefault(word, 0)
|
||||||
|
word_frequency[word] += 1
|
||||||
|
word_degree.setdefault(word, 0)
|
||||||
|
word_degree[word] += word_list_degree # orig.
|
||||||
|
# word_degree[word] += 1/(word_list_length*1.0) #exp.
|
||||||
|
for item in word_frequency:
|
||||||
|
word_degree[item] = word_degree[item] + word_frequency[item]
|
||||||
|
|
||||||
|
# Calculate Word scores = deg(w)/frew(w)
|
||||||
|
word_score = {}
|
||||||
|
for item in word_frequency:
|
||||||
|
word_score.setdefault(item, 0)
|
||||||
|
word_score[item] = word_degree[item] / (word_frequency[item] * 1.0) # orig.
|
||||||
|
# word_score[item] = word_frequency[item]/(word_degree[item] * 1.0) #exp.
|
||||||
|
return word_score
|
||||||
|
|
||||||
|
|
||||||
|
def generate_candidate_keyword_scores(phrase_list, word_score, min_keyword_frequency=1):
|
||||||
|
keyword_candidates = {}
|
||||||
|
for phrase in phrase_list:
|
||||||
|
if min_keyword_frequency > 1:
|
||||||
|
if phrase_list.count(phrase) < min_keyword_frequency:
|
||||||
|
continue
|
||||||
|
keyword_candidates.setdefault(phrase, 0)
|
||||||
|
word_list = separate_words(phrase, 0)
|
||||||
|
candidate_score = 0
|
||||||
|
for word in word_list:
|
||||||
|
candidate_score += word_score[word]
|
||||||
|
keyword_candidates[phrase] = candidate_score
|
||||||
|
return keyword_candidates
|
||||||
|
|
||||||
|
|
||||||
|
class Rake(object):
|
||||||
|
def __init__(self, stop_words_path, min_char_length=1, max_words_length=5, min_keyword_frequency=1,
|
||||||
|
min_words_length_adj=1, max_words_length_adj=1, min_phrase_freq_adj=2):
|
||||||
|
self.__stop_words_path = stop_words_path
|
||||||
|
self.__stop_words_list = load_stop_words(stop_words_path)
|
||||||
|
self.__min_char_length = min_char_length
|
||||||
|
self.__max_words_length = max_words_length
|
||||||
|
self.__min_keyword_frequency = min_keyword_frequency
|
||||||
|
self.__min_words_length_adj = min_words_length_adj
|
||||||
|
self.__max_words_length_adj = max_words_length_adj
|
||||||
|
self.__min_phrase_freq_adj = min_phrase_freq_adj
|
||||||
|
|
||||||
|
def run(self, text):
|
||||||
|
sentence_list = split_sentences(text)
|
||||||
|
|
||||||
|
stop_words_pattern = build_stop_word_regex(self.__stop_words_list)
|
||||||
|
|
||||||
|
phrase_list = generate_candidate_keywords(sentence_list, stop_words_pattern, self.__stop_words_list,
|
||||||
|
self.__min_char_length, self.__max_words_length,
|
||||||
|
self.__min_words_length_adj, self.__max_words_length_adj,
|
||||||
|
self.__min_phrase_freq_adj)
|
||||||
|
|
||||||
|
word_scores = calculate_word_scores(phrase_list)
|
||||||
|
|
||||||
|
keyword_candidates = generate_candidate_keyword_scores(phrase_list, word_scores, self.__min_keyword_frequency)
|
||||||
|
|
||||||
|
sorted_keywords = sorted(keyword_candidates.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
return sorted_keywords
|
||||||
|
|
||||||
|
|
||||||
|
# if test and __name__ == '__main__':
|
||||||
|
# text = "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for constructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types."
|
||||||
|
|
||||||
|
# # Split text into sentences
|
||||||
|
# sentenceList = split_sentences(text)
|
||||||
|
# # stoppath = "FoxStoplist.txt" #Fox stoplist contains "numbers", so it will not find "natural numbers" like in Table 1.1
|
||||||
|
# stoppath = "data/stoplists/SmartStoplist.txt" # SMART stoplist misses some of the lower-scoring keywords in Figure 1.5, which means that the top 1/3 cuts off one of the 4.0 score words in Table 1.1
|
||||||
|
# stopwordpattern = build_stop_word_regex(stoppath)
|
||||||
|
|
||||||
|
# # generate candidate keywords
|
||||||
|
# phraseList = generate_candidate_keywords(sentenceList, stopwordpattern, load_stop_words(stoppath))
|
||||||
|
|
||||||
|
# # calculate individual word scores
|
||||||
|
# wordscores = calculate_word_scores(phraseList)
|
||||||
|
|
||||||
|
# # generate candidate keyword scores
|
||||||
|
# keywordcandidates = generate_candidate_keyword_scores(phraseList, wordscores)
|
||||||
|
# if debug: print(keywordcandidates)
|
||||||
|
|
||||||
|
# sortedKeywords = sorted(six.iteritems(keywordcandidates), key=operator.itemgetter(1), reverse=True)
|
||||||
|
# if debug: print(sortedKeywords)
|
||||||
|
|
||||||
|
# totalKeywords = len(sortedKeywords)
|
||||||
|
# if debug: print(totalKeywords)
|
||||||
|
# print(sortedKeywords[0:(totalKeywords // 3)])
|
||||||
|
|
||||||
|
# rake = Rake("data/stoplists/SmartStoplist.txt")
|
||||||
|
# keywords = rake.run(text)
|
||||||
|
# print(keywords)
|
||||||
119
www/routes.py
119
www/routes.py
@ -3,6 +3,7 @@ from www import app
|
|||||||
import json, logging, os, glob
|
import json, logging, os, glob
|
||||||
from lxml import etree as et
|
from lxml import etree as et
|
||||||
import config
|
import config
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
def list_all(d, ext):
|
def list_all(d, ext):
|
||||||
|
|
||||||
@ -12,6 +13,112 @@ def list_all(d, ext):
|
|||||||
|
|
||||||
return [os.path.basename(f) for f in glob.glob(os.path.join(d, "*." + ext))]
|
return [os.path.basename(f) for f in glob.glob(os.path.join(d, "*." + ext))]
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def top():
|
||||||
|
return render_template("index.html")
|
||||||
|
|
||||||
|
'''
|
||||||
|
INDEX
|
||||||
|
'''
|
||||||
|
|
||||||
|
def read_index(d, fn):
|
||||||
|
fp = os.path.join(d, fn)
|
||||||
|
if not os.path.isfile(fp):
|
||||||
|
return None
|
||||||
|
|
||||||
|
with open(fp) as f:
|
||||||
|
index_data = json.load(f, object_pairs_hook=OrderedDict)
|
||||||
|
|
||||||
|
return index_data
|
||||||
|
|
||||||
|
# def add_selected_kw_index(d, fn, kw):
|
||||||
|
# fp = os.path.join(d, fn)
|
||||||
|
# if not os.path.isfile(fp):
|
||||||
|
# return False
|
||||||
|
|
||||||
|
# with open(fp) as f:
|
||||||
|
# index_data = json.load(f)
|
||||||
|
|
||||||
|
# if kw not in index_data['orphan']:
|
||||||
|
# return False
|
||||||
|
|
||||||
|
# v = index_data['orphan'].pop(kw)
|
||||||
|
# if kw not in index_data['selected']:
|
||||||
|
# index_data['selected'][kw] = []
|
||||||
|
|
||||||
|
# index_data['selected'][kw] += v
|
||||||
|
|
||||||
|
# with open(fp, 'w') as fout:
|
||||||
|
# json.dump(index_data, fout, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
|
|
||||||
|
# return True
|
||||||
|
|
||||||
|
|
||||||
|
def modify_selected_kw_index(d, fn, kw, action="add"):
|
||||||
|
fp = os.path.join(d, fn)
|
||||||
|
if not os.path.isfile(fp):
|
||||||
|
return False
|
||||||
|
|
||||||
|
with open(fp) as f:
|
||||||
|
index_data = json.load(f)
|
||||||
|
|
||||||
|
if action == 'add':
|
||||||
|
in_dic = index_data['selected']
|
||||||
|
out_dic = index_data['orphan']
|
||||||
|
elif action == 'delete':
|
||||||
|
out_dic = index_data['selected']
|
||||||
|
in_dic = index_data['orphan']
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if kw not in out_dic:
|
||||||
|
return False
|
||||||
|
|
||||||
|
v = out_dic.pop(kw)
|
||||||
|
if kw not in in_dic:
|
||||||
|
in_dic[kw] = []
|
||||||
|
|
||||||
|
in_dic[kw] += v
|
||||||
|
|
||||||
|
with open(fp, 'w') as fout:
|
||||||
|
json.dump(index_data, fout, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/index', methods = ['GET'])
|
||||||
|
def index():
|
||||||
|
if request.method == 'GET':
|
||||||
|
li = list_all(config.index['path'], 'js')
|
||||||
|
li = sorted(li, key=lambda x: int(x.split('.')[0]))
|
||||||
|
return render_template("list_files_all.html", title="INDEX [all]", prefix="/index/", files=li)
|
||||||
|
|
||||||
|
@app.route('/index/<path:fn>', methods = ['GET', 'POST'])
|
||||||
|
def indexfn(fn):
|
||||||
|
if request.method == 'GET':
|
||||||
|
data = read_index(config.index['path'], fn)
|
||||||
|
if data is not None:
|
||||||
|
return render_template("indx.html", fn=fn, selected=data['selected'], orphan=data['orphan'])
|
||||||
|
else:
|
||||||
|
return "File: " + fn + "does not exist."
|
||||||
|
elif request.method == 'POST':
|
||||||
|
data = request.form
|
||||||
|
a = data.get('action')
|
||||||
|
if a == "add":
|
||||||
|
logging.info("POST ADD " + fn + " -- " + data.get('kw') + " ++ " + data.get('list'))
|
||||||
|
if modify_selected_kw_index(config.index['path'], fn, data.get('kw')):
|
||||||
|
return "ok"
|
||||||
|
elif a == "delete":
|
||||||
|
logging.info("POST DELETE " + fn + " -- " + data.get('kw') + " ++ " + data.get('list'))
|
||||||
|
if modify_selected_kw_index(config.index['path'], fn, data.get('kw'), action="delete"):
|
||||||
|
return "ok"
|
||||||
|
return "-"
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
XML
|
||||||
|
'''
|
||||||
|
|
||||||
def read_xml(d, fn):
|
def read_xml(d, fn):
|
||||||
fp = os.path.join(d, fn)
|
fp = os.path.join(d, fn)
|
||||||
if not os.path.isfile(fp):
|
if not os.path.isfile(fp):
|
||||||
@ -97,17 +204,12 @@ def delete_nbr_xml(d, fn, nbr, date):
|
|||||||
tr.write(fp)
|
tr.write(fp)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@app.route('/xml', methods = ['GET'])
|
||||||
@app.route('/')
|
|
||||||
def index():
|
|
||||||
return render_template("index.html")
|
|
||||||
|
|
||||||
@app.route('/xml', methods = ['GET', 'POST'])
|
|
||||||
def xml():
|
def xml():
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
li = list_all(config.xml['path'], 'xml')
|
li = list_all(config.xml['path'], 'xml')
|
||||||
li = sorted(li, key=lambda x: int(x.split('.')[0]))
|
li = sorted(li, key=lambda x: int(x.split('.')[0]))
|
||||||
return render_template("xml_all.html", files=li)
|
return render_template("list_files_all.html", title="XML [all]", prefix="/xml/", files=li)
|
||||||
|
|
||||||
@app.route('/xml/<path:fn>', methods = ['GET', 'POST'])
|
@app.route('/xml/<path:fn>', methods = ['GET', 'POST'])
|
||||||
def xmlfn(fn):
|
def xmlfn(fn):
|
||||||
@ -129,6 +231,3 @@ def xmlfn(fn):
|
|||||||
if delete_nbr_xml(config.xml['path'], fn, data.get('nbr'), data.get('date')):
|
if delete_nbr_xml(config.xml['path'], fn, data.get('nbr'), data.get('date')):
|
||||||
return "ok"
|
return "ok"
|
||||||
return "-"
|
return "-"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
10
www/static/indx.js
Normal file
10
www/static/indx.js
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
$(document).ready(function(){
|
||||||
|
$('.add, .delete').click(function(e) {
|
||||||
|
var li = $(this).parent("li");
|
||||||
|
$.post('/index/' + li.data("file"), {'action': $(this).attr('class'), 'kw': li.data("kw"), 'list': li.data("list")}, function(d) {
|
||||||
|
if(d === 'ok') {
|
||||||
|
location.reload();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
28
www/templates/indx.html
Normal file
28
www/templates/indx.html
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>{{fn}}</title>
|
||||||
|
<script type="text/javascript" src="{{ url_for('static',filename='jquery-3.2.1.min.js') }}" charset="utf-8"></script>
|
||||||
|
<script type="text/javascript" src="{{ url_for('static',filename='indx.js') }}"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>{{fn}}</h1>
|
||||||
|
<div id="all">
|
||||||
|
<h2>Selected</h2>
|
||||||
|
<ul>
|
||||||
|
{% for kw, s in selected.items() %}
|
||||||
|
<li data-file="{{fn}}" data-kw="{{kw}}" data-list="selected">{{kw}} {% for ss in s %} - {{ss}} {% endfor %}<button class="delete">-</button></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
<hr>
|
||||||
|
<hr>
|
||||||
|
<hr>
|
||||||
|
<h2>Orphans</h2>
|
||||||
|
<ul>
|
||||||
|
{% for kw, s in orphan.items() %}
|
||||||
|
<li data-file="{{fn}}" data-kw="{{kw}}" data-list="orphan">{{kw}} {% for ss in s %} - {{ss}} {% endfor %}<button class="add">+</button></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@ -1,14 +1,14 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>XML [all]</title>
|
<title>{{title}}</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h1>XML [all]</h1>
|
<h1>{{title}}</h1>
|
||||||
<div id="all">
|
<div id="all">
|
||||||
<ul>
|
<ul>
|
||||||
{% for f in files %}
|
{% for f in files %}
|
||||||
<li><a href="/xml/{{f}}">{{f}}</a></li>
|
<li><a href="{{prefix}}{{f}}">{{f}}</a></li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
Loading…
x
Reference in New Issue
Block a user