import urllib.request, urllib.parse import logging, os, sys, traceback, re, time, json, gzip, difflib from bs4 import BeautifulSoup def request(url): response = urllib.request.urlopen(url) html = response.read() try: html = html.decode(encoding="utf-8") except: logging.warning("Error decoding(utf-8): " + url + "... Continuing (non-utf).") soup = BeautifulSoup(html, "html5lib") return soup