mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXT Input: When converting a txt file with a Byte Order Mark, remove the Byte Order Mark before further processing as it can cause the first line of the text to be mis-interpreted.
This commit is contained in:
parent
a1fd361f81
commit
8523ad9103
@ -97,6 +97,12 @@ class TXTInput(InputFormatPlugin):
|
||||
if not ienc:
|
||||
ienc = 'utf-8'
|
||||
log.debug('No input encoding specified and could not auto detect using %s' % ienc)
|
||||
# Remove BOM from start of txt as its presence can confuse markdown
|
||||
import codecs
|
||||
for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
|
||||
if txt.startswith(bom):
|
||||
txt = txt[len(bom):]
|
||||
break
|
||||
txt = txt.decode(ienc, 'replace')
|
||||
|
||||
# Replace entities
|
||||
|
Loading…
x
Reference in New Issue
Block a user