93 lines
2.1 KiB
Python
Raw Normal View History

2017-11-04 13:34:05 +01:00
import email
import hashlib
def format_content(msg, archive_name):
return msg['content']
def format_url(msg, archive_name):
return msg['url']
def format_author(msg, archive_name):
return msg['author_name']
def format_from_token(from_str, sep):
2017-11-24 09:52:14 +01:00
fff = from_str
2017-11-04 13:34:05 +01:00
from_addr = email.utils.parseaddr(from_str)[1]
2017-11-24 09:52:14 +01:00
fffa = email.utils.parseaddr(from_str)
2017-11-04 13:34:05 +01:00
if sep not in from_addr:
2017-11-24 09:52:14 +01:00
tok = from_str.split()
2017-11-04 13:34:05 +01:00
try:
at = tok.index(sep)
from_addr = ''.join([tok[at-1], '{AT}', tok[at+1]])
if from_addr.startswith('<') or from_addr.endswith('>'):
from_addr = from_addr.strip('<').strip('>')
except ValueError:
print(tok)
print("error formating 'from' " + from_str + " -- expecting sep: " + sep)
2017-11-24 09:52:14 +01:00
print("*** " + fff)
print("+++")
print(fffa)
print("----")
2017-11-04 13:34:05 +01:00
return None
else:
from_addr = from_addr.replace(sep, '{AT}')
return from_addr.lower()
def format_from(msg, archive_name):
2017-11-24 09:52:14 +01:00
from_str = msg['from']
2017-11-04 13:34:05 +01:00
if " {AT} " in from_str:
return format_from_token(from_str, '{AT}')
elif " at " in from_str:
return format_from_token(from_str, 'at')
elif "@" in from_str:
return format_from_token(from_str, '@')
else:
return from_str
# returns utc timestamp
def format_date(msg, archive_name):
date_str = msg['date']
time_tz = None
try:
date_tz = email.utils.parsedate_tz(date_str)
time_tz = email.utils.mktime_tz(date_tz) #utc timestamp
except TypeError:
print("Format Date TypeError")
print(" > " + date_str)
return None
except ValueError:
print("Format Date ValueError")
print(" > " + date_str)
return None
finally:
return time_tz
def format_subject(msg, archive_name):
return msg['subject']
def format_id(msg, archive_name):
if "message-id" in msg:
return msg['message-id']
else:
# create hash with author_name + date
s = msg['author_name'] + msg['date']
sha = hashlib.sha1(s.encode('utf-8'))
return sha.hexdigest()
# format='%d/%m/%Y'
def min_date(archive_name):
if "nettime" in archive_name:
return '01/10/1995'
elif archive_name == "spectre":
return '01/08/2001'
elif archive_name == "empyre":
return '01/01/2002'
elif archive_name == "crumb":
return '01/02/2001'