14 lines
403 B
Python
14 lines
403 B
Python
|
|
import urllib.request, urllib.parse
|
||
|
|
import logging, os, sys, traceback, re, time, json, gzip, difflib
|
||
|
|
from bs4 import BeautifulSoup
|
||
|
|
|
||
|
|
def request(url):
|
||
|
|
response = urllib.request.urlopen(url)
|
||
|
|
html = response.read()
|
||
|
|
try:
|
||
|
|
html = html.decode(encoding="utf-8")
|
||
|
|
except:
|
||
|
|
logging.warning("Error decoding(utf-8): " + url + "... Continuing (non-utf).")
|
||
|
|
soup = BeautifulSoup(html, "html5lib")
|
||
|
|
return soup
|