diff --git a/archive/archive.py b/archive/archive.py index 50d9139..301b95d 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -263,7 +263,7 @@ class Archive: return results except mariadb.Error as error: - logging.erro("Error: {}".format(error)) + logging.error("Error: {}".format(error)) finally: cursor.close() diff --git a/lists/mhonarc.py b/lists/mhonarc.py index 8a88036..a2e1824 100644 --- a/lists/mhonarc.py +++ b/lists/mhonarc.py @@ -203,15 +203,18 @@ def collect_message(url, message): # message['author_name'] = from_addr[0] # message['from'] = from_addr[1] - ## -- content -- - # test - c1 = soup.select('pre:nth-of-type(1)') - if len(c1) > 0: - message['content'] = c1[0].text - else: - message['content'] = soup.select('pre:nth-of-type(2)')[0].text + # ## -- content -- + # # test + # c1 = soup.select('pre:nth-of-type(1)') + # if len(c1) > 0: + # message['content'] = c1[0].text + # else: + # message['content'] = soup.select('pre:nth-of-type(2)')[0].text - # message['content'] = soup.select('pre:nth-of-type(2)')[0].text + # # message['content'] = soup.select('pre:nth-of-type(2)')[0].text + + # new way! + message['content'] = parse_xmessage(str(soup)) # mhonarc xcomments # ref: http://www.schlaubert.de/MHonArc/doc/resources/printxcomments.html @@ -223,3 +226,11 @@ def parse_xcomment(soup, xcom): def test_xcomment(soup): return soup.find(text=re.compile('X-Message-Id')) is not None + +# (edit 21.12.2019): this is the new way as of 2018 -- when no more moderation on Nettime... +def parse_xmessage(html): + rr = r'.*?' + s = re.search(rr, html, re.DOTALL) + se = BeautifulSoup(s.group(), "html5lib") + return se.get_text() +