mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
FB2 Input: Workaround for broken FB2 files produced by convertonlinefree.com. See #1404701 (Does not display some fb2 books)
This commit is contained in:
parent
555445824e
commit
e2c6d4e5c0
@ -36,6 +36,7 @@ class FB2Input(InputFormatPlugin):
|
|||||||
def convert(self, stream, options, file_ext, log,
|
def convert(self, stream, options, file_ext, log,
|
||||||
accelerators):
|
accelerators):
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from calibre.ebooks.metadata.fb2 import ensure_namespace
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
||||||
@ -57,10 +58,12 @@ class FB2Input(InputFormatPlugin):
|
|||||||
parser=RECOVER_PARSER)
|
parser=RECOVER_PARSER)
|
||||||
if doc is None:
|
if doc is None:
|
||||||
raise ValueError('The FB2 file is not valid XML')
|
raise ValueError('The FB2 file is not valid XML')
|
||||||
|
doc = ensure_namespace(doc)
|
||||||
try:
|
try:
|
||||||
fb_ns = doc.nsmap[doc.prefix]
|
fb_ns = doc.nsmap[doc.prefix]
|
||||||
except Exception:
|
except Exception:
|
||||||
fb_ns = FB2NS
|
fb_ns = FB2NS
|
||||||
|
|
||||||
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
|
NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
|
||||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||||
css = ''
|
css = ''
|
||||||
|
@ -278,7 +278,7 @@ def _get_fbroot(stream):
|
|||||||
raw = stream.read()
|
raw = stream.read()
|
||||||
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
|
raw = xml_to_unicode(raw, strip_encoding_pats=True)[0]
|
||||||
root = etree.fromstring(raw, parser=parser)
|
root = etree.fromstring(raw, parser=parser)
|
||||||
return root
|
return ensure_namespace(root)
|
||||||
|
|
||||||
def _set_title(title_info, mi, ctx):
|
def _set_title(title_info, mi, ctx):
|
||||||
if not mi.is_null('title'):
|
if not mi.is_null('title'):
|
||||||
@ -381,3 +381,19 @@ def set_metadata(stream, mi, apply_null=False, update_timestamp=False):
|
|||||||
stream.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
|
stream.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
|
||||||
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
|
stream.write(etree.tostring(root, method='xml', encoding='utf-8',
|
||||||
xml_declaration=False))
|
xml_declaration=False))
|
||||||
|
|
||||||
|
def ensure_namespace(doc):
|
||||||
|
# Workaround for broken FB2 files produced by convertonlinefree.com. See
|
||||||
|
# https://bugs.launchpad.net/bugs/1404701
|
||||||
|
bare_tags = False
|
||||||
|
for x in ('description', 'body'):
|
||||||
|
for x in doc.findall(x):
|
||||||
|
if '{' not in x.tag:
|
||||||
|
bare_tags = True
|
||||||
|
break
|
||||||
|
if bare_tags:
|
||||||
|
import re
|
||||||
|
raw = etree.tostring(doc, encoding=unicode)
|
||||||
|
raw = re.sub(r'''<(description|body)\s+xmlns=['"]['"]>''', r'<\1>', raw)
|
||||||
|
doc = etree.fromstring(raw)
|
||||||
|
return doc
|
||||||
|
Loading…
x
Reference in New Issue
Block a user