From 8523ad9103ae538031fa089d539067909cd5083e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 7 Apr 2013 21:15:42 +0530
Subject: [PATCH] TXT Input: When converting a txt file with a Byte Order Mark,
 remove the Byte Order Mark before further processing as it can cause the
 first line of the text to be mis-interpreted.

---
 src/calibre/ebooks/conversion/plugins/txt_input.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py
index e916b30c29..50f1409ea6 100644
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@@ -97,6 +97,12 @@ class TXTInput(InputFormatPlugin):
         if not ienc:
             ienc = 'utf-8'
             log.debug('No input encoding specified and could not auto detect using %s' % ienc)
+        # Remove BOM from start of txt as its presence can confuse markdown
+        import codecs
+        for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
+            if txt.startswith(bom):
+                txt = txt[len(bom):]
+                break
         txt = txt.decode(ienc, 'replace')
 
         # Replace entities