From 60ec978a8f632e9d7b83f05ebe145353b8763f10 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 19 Jun 2012 15:12:55 +0530
Subject: [PATCH] KF8 Input: Handle files with oncorrectly encoded guide type
 entries. Fixes #1015020 (azw3 file "invalid start byte")

---
 src/calibre/ebooks/mobi/reader/index.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/mobi/reader/index.py b/src/calibre/ebooks/mobi/reader/index.py
index c732d8862e..6086d547bf 100644
--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@@ -224,7 +224,18 @@ def parse_index_record(table, data, control_byte_count, tags, codec,
     for j in xrange(entry_count):
         start, end = idx_positions[j:j+2]
         rec = data[start:end]
-        ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map)
+        # Sometimes (in the guide table if the type attribute has non ascii
+        # values) the ident is UTF-16 encoded. Try to handle that.
+        try:
+            ident, consumed = decode_string(rec, codec=codec, ordt_map=ordt_map)
+        except UnicodeDecodeError:
+            ident, consumed = decode_string(rec, codec='utf-16', ordt_map=ordt_map)
+        if u'\x00' in ident:
+            try:
+                ident, consumed = decode_string(rec, codec='utf-16',
+                        ordt_map=ordt_map)
+            except UnicodeDecodeError:
+                ident = ident.replace('u\x00', u'')
         rec = rec[consumed:]
         tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
         table[ident] = tag_map