FB2 Input: Workaround for broken FB2 files produced by convertonlinefree.com. See #1404701 (Does not display some fb2 books)

This commit is contained in:
Kovid Goyal 2014-12-22 09:30:12 +05:30
parent 555445824e
commit e2c6d4e5c0
2 changed files with 20 additions and 1 deletions

View File

@ -36,6 +36,7 @@ class FB2Input(InputFormatPlugin):
def convert(self, stream, options, file_ext, log,
accelerators):
from lxml import etree
from calibre.ebooks.metadata.fb2 import ensure_namespace
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
@ -57,10 +58,12 @@ class FB2Input(InputFormatPlugin):
parser=RECOVER_PARSER)
if doc is None:
raise ValueError('The FB2 file is not valid XML')
doc = ensure_namespace(doc)
try:
fb_ns = doc.nsmap[doc.prefix]
except Exception:
fb_ns = FB2NS
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
css = ''

View File

@ -278,7 +278,7 @@ def _get_fbroot(stream):
raw = stream.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
root = etree.fromstring(raw, parser=parser)
return root
return ensure_namespace(root)
def _set_title(title_info, mi, ctx):
if not mi.is_null('title'):
@ -381,3 +381,19 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
stream.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
xml_declaration=False))
def ensure_namespace(doc):
# Workaround for broken FB2 files produced by convertonlinefree.com. See
# https://bugs.launchpad.net/bugs/1404701
bare_tags = False
for x in ('description', 'body'):
for x in doc.findall(x):
if '{' not in x.tag:
bare_tags = True
break
if bare_tags:
import re
raw = etree.tostring(doc, encoding=unicode)
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
doc = etree.fromstring(raw)
return doc