mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
FB2 Input: Make parsing of malformed FB2 files a little more robust
This commit is contained in:
parent
270f46b041
commit
8fb38c3862
@ -178,13 +178,13 @@
|
||||
</xsl:if>
|
||||
<xsl:if test="$section_toc_id != 'None'">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||
<xsl:attribute name="id">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<a name="TOC_{generate-id()}"></a>
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
@ -194,7 +194,7 @@
|
||||
<xsl:element name="h6">
|
||||
<xsl:if test="@id">
|
||||
<xsl:element name="a">
|
||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||
</xsl:element>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
|
@ -40,14 +40,18 @@ class FB2Input(InputFormatPlugin):
|
||||
accelerators):
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.meta import get_metadata
|
||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
||||
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
|
||||
log.debug('Parsing XML...')
|
||||
raw = stream.read()
|
||||
raw = stream.read().replace('\0', '')
|
||||
try:
|
||||
doc = etree.fromstring(raw.replace('\0', ''))
|
||||
doc = etree.fromstring(raw)
|
||||
except etree.XMLSyntaxError:
|
||||
doc = etree.fromstring(raw.replace('& ', '&'))
|
||||
try:
|
||||
doc = etree.fromstring(raw, parser=RECOVER_PARSER)
|
||||
except:
|
||||
doc = etree.fromstring(raw.replace('& ', '&'),
|
||||
parser=RECOVER_PARSER)
|
||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||
css = ''
|
||||
for s in stylesheets:
|
||||
|
@ -22,7 +22,8 @@ def get_metadata(stream):
|
||||
'xlink':XLINK_NS})
|
||||
tostring = lambda x : etree.tostring(x, method='text',
|
||||
encoding=unicode).strip()
|
||||
root = etree.fromstring(stream.read())
|
||||
parser = etree.XMLParser(recover=True, no_network=True)
|
||||
root = etree.fromstring(stream.read(), parser=parser)
|
||||
authors, author_sort = [], None
|
||||
for au in XPath('//fb2:author')(root):
|
||||
fname = lname = author = None
|
||||
|
Loading…
x
Reference in New Issue
Block a user