From e2c6d4e5c0eb128c50cae70277cf4c73281d2e86 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 22 Dec 2014 09:30:12 +0530
Subject: [PATCH] FB2 Input: Workaround for broken FB2 files produced by
 convertonlinefree.com. See #1404701 (Does not display some fb2 books)

---
 .../ebooks/conversion/plugins/fb2_input.py     |  3 +++
 src/calibre/ebooks/metadata/fb2.py             | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/conversion/plugins/fb2_input.py b/src/calibre/ebooks/conversion/plugins/fb2_input.py
index 61f36fd458..424aa0b7e3 100644
--- a/src/calibre/ebooks/conversion/plugins/fb2_input.py
+++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py
@@ -36,6 +36,7 @@ class FB2Input(InputFormatPlugin):
     def convert(self, stream, options, file_ext, log,
                 accelerators):
         from lxml import etree
+        from calibre.ebooks.metadata.fb2 import ensure_namespace
         from calibre.ebooks.metadata.opf2 import OPFCreator
         from calibre.ebooks.metadata.meta import get_metadata
         from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
@@ -57,10 +58,12 @@ class FB2Input(InputFormatPlugin):
                         parser=RECOVER_PARSER)
         if doc is None:
             raise ValueError('The FB2 file is not valid XML')
+        doc = ensure_namespace(doc)
         try:
             fb_ns = doc.nsmap[doc.prefix]
         except Exception:
             fb_ns = FB2NS
+
         NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
         stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
         css = ''
diff --git a/src/calibre/ebooks/metadata/fb2.py b/src/calibre/ebooks/metadata/fb2.py
index d1b5fe1560..829eb7e852 100644
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@@ -278,7 +278,7 @@ def _get_fbroot(stream):
     raw = stream.read()
     raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
     root = etree.fromstring(raw, parser=parser)
-    return root
+    return ensure_namespace(root)
 
 def _set_title(title_info, mi, ctx):
     if not mi.is_null('title'):
@@ -381,3 +381,19 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
     stream.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
     stream.write(etree.tostring(root, method='xml', encoding='utf-8',
         xml_declaration=False))
+
+def ensure_namespace(doc):
+    # Workaround for broken FB2 files produced by convertonlinefree.com. See
+    # https://bugs.launchpad.net/bugs/1404701
+    bare_tags = False
+    for x in ('description', 'body'):
+        for x in doc.findall(x):
+            if '{' not in x.tag:
+                bare_tags = True
+                break
+    if bare_tags:
+        import re
+        raw = etree.tostring(doc, encoding=unicode)
+        raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
+        doc = etree.fromstring(raw)
+    return doc