mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Fix handling of encoding
This commit is contained in:
parent
5b7416ff84
commit
a69e3991b3
@ -27,11 +27,10 @@ from libprs500.ebooks.markdown import markdown
|
|||||||
def option_parser():
|
def option_parser():
|
||||||
parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
|
parser = lrf_option_parser('''Usage: %prog [options] mybook.txt\n\n'''
|
||||||
'''%prog converts mybook.txt to mybook.lrf''')
|
'''%prog converts mybook.txt to mybook.lrf''')
|
||||||
defenc = 'cp1252' if iswindows else 'utf8'
|
|
||||||
enchelp = 'Set the encoding used to decode ' + \
|
enchelp = 'Set the encoding used to decode ' + \
|
||||||
'the text in mybook.txt. Default encoding is %default'
|
'the text in mybook.txt. Default is to try to autodetect.'
|
||||||
parser.add_option('-e', '--encoding', action='store', type='string', \
|
parser.add_option('-e', '--encoding', action='store', type='string', \
|
||||||
dest='encoding', help=enchelp, default=defenc)
|
dest='encoding', help=enchelp, default=None)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@ -40,27 +39,27 @@ def generate_html(txtfile, encoding):
|
|||||||
Convert txtfile to html and return a PersistentTemporaryFile object pointing
|
Convert txtfile to html and return a PersistentTemporaryFile object pointing
|
||||||
to the file with the HTML.
|
to the file with the HTML.
|
||||||
'''
|
'''
|
||||||
encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
|
enc = encoding
|
||||||
if iswindows:
|
if not encoding:
|
||||||
encodings = ['cp1252'] + encodings
|
encodings = ['cp1252', 'latin-1', 'iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
|
||||||
if encoding not in ['cp1252', 'utf8']:
|
txt, enc = None, None
|
||||||
encodings = [encoding] + encodings
|
for encoding in encodings:
|
||||||
txt, enc = None, None
|
try:
|
||||||
for encoding in encodings:
|
txt = codecs.open(txtfile, 'rb', encoding).read()
|
||||||
try:
|
except UnicodeDecodeError:
|
||||||
txt = codecs.open(txtfile, 'rb', encoding).read()
|
continue
|
||||||
except UnicodeDecodeError:
|
enc = encoding
|
||||||
continue
|
break
|
||||||
enc = encoding
|
if txt == None:
|
||||||
break
|
raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
|
||||||
if txt == None:
|
else:
|
||||||
raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
|
txt = codecs.open(txtfile, 'rb', enc).read()
|
||||||
md = markdown.Markdown(txt,
|
md = markdown.Markdown(txt,
|
||||||
extensions=['footnotes', 'tables', 'toc'],
|
extensions=['footnotes', 'tables', 'toc'],
|
||||||
encoding=enc,
|
encoding=enc,
|
||||||
safe_mode=False,
|
safe_mode=False,
|
||||||
)
|
)
|
||||||
html = md.toString().decode(enc)
|
html = md.toString()
|
||||||
p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
|
p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
|
||||||
p.close()
|
p.close()
|
||||||
codecs.open(p.name, 'wb', enc).write(html)
|
codecs.open(p.name, 'wb', enc).write(html)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user