pppadump/linkify.py

41 lines
844 B
Python
Raw Normal View History

2015-07-23 18:09:20 +02:00
from __future__ import print_function
import re, sys
2015-07-30 13:33:39 +02:00
def strip_tags (text):
return re.sub(r"<.*?>", "", text)
2015-08-25 11:54:37 +02:00
def urlify (t, ext=".html"):
return t.replace(" ", "_") + ext
2015-07-23 18:09:20 +02:00
2015-07-30 13:33:39 +02:00
def filename_to_padid (t):
t = t.replace("_", " ")
t = re.sub(r"\.html$", "", t)
return t
2015-07-23 18:09:20 +02:00
def linkify (src, urlify=urlify):
collect = []
def s (m):
2015-07-30 13:33:39 +02:00
contents = strip_tags(m.group(1))
2015-07-23 18:09:20 +02:00
collect.append(contents)
link = urlify(contents)
# link = link.split("?", 1)[0]
2015-07-23 18:09:20 +02:00
return "[[<a class=\"wikilink\" href=\"{0}\">{1}</a>]]".format(link, contents)
2015-07-30 13:33:39 +02:00
# src = re.sub(r"\[\[([\w_\- ,]+?)\]\]", s, src)
## question marks are ignored by etherpad, so split/strip it
src = re.sub(r"\[\[(.+?)(\?.*)?\]\]", s, src)
2015-07-23 18:09:20 +02:00
return (src, collect)
if __name__ == "__main__":
src = sys.stdin.read()
src, links = linkify(src)
for l in links:
print (l)
print (src)