??? trying to merge after a year???
This commit is contained in:
gauthiier 2020-12-15 15:37:17 +01:00
commit 9c4cdb72ad
2 changed files with 20 additions and 9 deletions

View File

@ -263,7 +263,7 @@ class Archive:
return results return results
except mariadb.Error as error: except mariadb.Error as error:
logging.erro("Error: {}".format(error)) logging.error("Error: {}".format(error))
finally: finally:
cursor.close() cursor.close()

View File

@ -203,15 +203,18 @@ def collect_message(url, message):
# message['author_name'] = from_addr[0] # message['author_name'] = from_addr[0]
# message['from'] = from_addr[1] # message['from'] = from_addr[1]
## -- content -- # ## -- content --
# test # # test
c1 = soup.select('pre:nth-of-type(1)') # c1 = soup.select('pre:nth-of-type(1)')
if len(c1) > 0: # if len(c1) > 0:
message['content'] = c1[0].text # message['content'] = c1[0].text
else: # else:
message['content'] = soup.select('pre:nth-of-type(2)')[0].text # message['content'] = soup.select('pre:nth-of-type(2)')[0].text
# message['content'] = soup.select('pre:nth-of-type(2)')[0].text # # message['content'] = soup.select('pre:nth-of-type(2)')[0].text
# new way!
message['content'] = parse_xmessage(str(soup))
# mhonarc xcomments # mhonarc xcomments
# ref: http://www.schlaubert.de/MHonArc/doc/resources/printxcomments.html # ref: http://www.schlaubert.de/MHonArc/doc/resources/printxcomments.html
@ -223,3 +226,11 @@ def parse_xcomment(soup, xcom):
def test_xcomment(soup): def test_xcomment(soup):
return soup.find(text=re.compile('X-Message-Id')) is not None return soup.find(text=re.compile('X-Message-Id')) is not None
# (edit 21.12.2019): this is the new way as of 2018 -- when no more moderation on Nettime...
def parse_xmessage(html):
rr = r'<!--X-Body-of-Message-->.*?<!--X-Body-of-Message-End-->'
s = re.search(rr, html, re.DOTALL)
se = BeautifulSoup(s.group(), "html5lib")
return se.get_text()