From e520f31ed674d1ebbfa750ef6f92950d49ce4fbf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 8 May 2010 00:24:37 -0600 Subject: [PATCH] CBC Input: Handle comics.txt encoded in UTF-16 with a BOM --- src/calibre/ebooks/comic/input.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index 3a3cb7d83e..0a05bd2cca 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -341,8 +341,15 @@ class ComicInput(InputFormatPlugin): if not os.path.exists('comics.txt'): raise ValueError('%s is not a valid comic collection' %stream.name) - raw = open('comics.txt', 'rb').read().decode('utf-8') - raw.lstrip(unicode(codecs.BOM_UTF8, "utf8" )) + raw = open('comics.txt', 'rb').read() + if raw.startswith(codecs.BOM_UTF16_BE): + raw = raw.decode('utf-16-be')[1:] + elif raw.startswith(codecs.BOM_UTF16_LE): + raw = raw.decode('utf-16-le')[1:] + elif raw.startswith(codecs.BOM_UTF8): + raw = raw.decode('utf-8')[1:] + else: + raw = raw.decode('utf-8') for line in raw.splitlines(): line = line.strip() if not line: