mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
FB2 Input: Workaround for broken FB2 files produced by convertonlinefree.com. See #1404701 (Does not display some fb2 books)
This commit is contained in:
parent
555445824e
commit
e2c6d4e5c0
@ -36,6 +36,7 @@ class FB2Input(InputFormatPlugin):
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata.fb2 import ensure_namespace
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
||||
@ -57,10 +58,12 @@ class FB2Input(InputFormatPlugin):
|
||||
parser=RECOVER_PARSER)
|
||||
if doc is None:
|
||||
raise ValueError('The FB2 file is not valid XML')
|
||||
doc = ensure_namespace(doc)
|
||||
try:
|
||||
fb_ns = doc.nsmap[doc.prefix]
|
||||
except Exception:
|
||||
fb_ns = FB2NS
|
||||
|
||||
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||
css = ''
|
||||
|
@ -278,7 +278,7 @@ def _get_fbroot(stream):
|
||||
raw = stream.read()
|
||||
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
|
||||
root = etree.fromstring(raw, parser=parser)
|
||||
return root
|
||||
return ensure_namespace(root)
|
||||
|
||||
def _set_title(title_info, mi, ctx):
|
||||
if not mi.is_null('title'):
|
||||
@ -381,3 +381,19 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
||||
stream.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
|
||||
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
|
||||
xml_declaration=False))
|
||||
|
||||
def ensure_namespace(doc):
|
||||
# Workaround for broken FB2 files produced by convertonlinefree.com. See
|
||||
# https://bugs.launchpad.net/bugs/1404701
|
||||
bare_tags = False
|
||||
for x in ('description', 'body'):
|
||||
for x in doc.findall(x):
|
||||
if '{' not in x.tag:
|
||||
bare_tags = True
|
||||
break
|
||||
if bare_tags:
|
||||
import re
|
||||
raw = etree.tostring(doc, encoding=unicode)
|
||||
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
||||
doc = etree.fromstring(raw)
|
||||
return doc
|
||||
|
Loading…
x
Reference in New Issue
Block a user