Sie könnten dieses Skript versuchen. Ich stellte fest, dass ich einige Mbox-Dateien, die ich aus Mailman-Mailinglisten-Archiven heruntergeladen hatte, massieren musste, um sie in ein Format zu bringen, das von Mutt erkannt wurde. Ich denke, das Datumsformat ist am kritischsten. Ich bin noch nicht auf eine einfachere Lösung gestoßen. Aber das funktioniert bei mir.
#!/usr/bin/env python
"""
Usage: ./mailman2mbox.py infile outfile default-to-address
"""
import sys
from time import strftime,strptime,mktime,asctime
from email.utils import parseaddr,formatdate
if len(sys.argv) not in (3,4):
print __doc__
sys.exit()
out = open(sys.argv[2],"w")
listid = None
if len(sys.argv)==4:
listid = sys.argv[3]
date_patterns = ("%b %d %H:%M:%S %Y", "%d %b %H:%M:%S %Y", "%d %b %Y %H:%M:%S", "%d %b %H:%M:%S", "%d %b %y %H:%M:%S", "%d %b %Y %H.%M.%S",'%m/%d/%y %H:%M:%S %p')
class HeaderError(TypeError):
pass
def finish(headers, body):
body.append("\n")
for n,ln in enumerate(headers):
if ln.startswith("Date:"):
break
else:
raise HeaderError("No 'Date:' header:\n" + "".join(headers)+"\n")
if listid is not None:
for ln2 in headers:
if ln2.lower().startswith("list-id:"):
break
else:
headers.append("List-Id: <%s>\n" % (listid,))
date_line = ln[5:].strip()
if date_line.endswith(')'):
date_line = date_line[:date_line.rfind('(')].rstrip()
if date_line[-5] in "+-":
date_line, tz = date_line[:-5].rstrip(), int(date_line[-5:])//100
else:
tz = -5
if date_line[:3] in ("Mon","Tue","Wed","Thu","Fri","Sat","Sun"):
if date_line[3:5] == ', ':
prefix = "%a, "
elif date_line[3] == ',':
prefix = "%a,"
else:
prefix = "%a "
else:
prefix = ""
while True:
for p in date_patterns:
try:
date_struct = strptime(date_line, prefix+p)
except ValueError:
pass
else:
break
else:
if not date_line:
raise ValueError(headers[n])
date_line = date_line[:date_line.rfind(' ')]
continue
break
date_struct = list(date_struct)
try:
headers[n] = 'Date: %s\n' % (formatdate(mktime(date_struct),True))
headers[0] = "%s %s\n" % (headers[0][:-25].rstrip(), asctime(date_struct), )
except ValueError:
raise ValueError(headers[n])
for w in headers, body:
for s in w:
out.write(s)
message = 0
headers, body = None, None
for line in open(sys.argv[1]):
if line.startswith("From "):
message+=1
header = True
if headers is not None:
try:
finish(headers, body)
except HeaderError:
message -= 1
out.write('>')
for w in headers, body:
for s in w:
out.write(s)
headers, body = [], []
line = line.replace(" at ", "@")
elif line == '\n':
header = False
elif header and line.startswith('From:'):
line = line.replace(" at ","@")
(headers if header else body).append(line)
try:
finish(headers, body)
except HeaderError:
out.write('>')
for w in headers, body:
for s in w:
out.write(s)
out.close()