CBC Input: Handle comics.txt encoded in UTF-16 with a BOM

This commit is contained in:
Kovid Goyal 2010-05-08 00:24:37 -06:00
parent 6b79a732b1
commit e520f31ed6

View File

@ -341,8 +341,15 @@ class ComicInput(InputFormatPlugin):
if not os.path.exists('comics.txt'):
raise ValueError('%s is not a valid comic collection'
%stream.name)
raw = open('comics.txt', 'rb').read().decode('utf-8')
raw.lstrip(unicode(codecs.BOM_UTF8, "utf8" ))
raw = open('comics.txt', 'rb').read()
if raw.startswith(codecs.BOM_UTF16_BE):
raw = raw.decode('utf-16-be')[1:]
elif raw.startswith(codecs.BOM_UTF16_LE):
raw = raw.decode('utf-16-le')[1:]
elif raw.startswith(codecs.BOM_UTF8):
raw = raw.decode('utf-8')[1:]
else:
raw = raw.decode('utf-8')
for line in raw.splitlines():
line = line.strip()
if not line: