listservs/lists/util.py
2019-12-22 08:22:20 +01:00

14 lines
403 B
Python

import urllib.request, urllib.parse
import logging, os, sys, traceback, re, time, json, gzip, difflib
from bs4 import BeautifulSoup
def request(url):
response = urllib.request.urlopen(url)
html = response.read()
try:
html = html.decode(encoding="utf-8")
except:
logging.warning("Error decoding(utf-8): " + url + "... Continuing (non-utf).")
soup = BeautifulSoup(html, "html5lib")
return soup