mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
FB2 Input: Make parsing of malformed FB2 files a little more robust
This commit is contained in:
parent
270f46b041
commit
8fb38c3862
@ -178,13 +178,13 @@
|
|||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:if test="$section_toc_id != 'None'">
|
<xsl:if test="$section_toc_id != 'None'">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
<xsl:attribute name="id">TOC_<xsl:value-of select="$section_toc_id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<a name="TOC_{generate-id()}"></a>
|
<a name="TOC_{generate-id()}"></a>
|
||||||
<xsl:if test="@id">
|
<xsl:if test="@id">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
@ -194,7 +194,7 @@
|
|||||||
<xsl:element name="h6">
|
<xsl:element name="h6">
|
||||||
<xsl:if test="@id">
|
<xsl:if test="@id">
|
||||||
<xsl:element name="a">
|
<xsl:element name="a">
|
||||||
<xsl:attribute name="name"><xsl:value-of select="@id"/></xsl:attribute>
|
<xsl:attribute name="id"><xsl:value-of select="@id"/></xsl:attribute>
|
||||||
</xsl:element>
|
</xsl:element>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
<xsl:apply-templates/>
|
<xsl:apply-templates/>
|
||||||
|
@ -40,14 +40,18 @@ class FB2Input(InputFormatPlugin):
|
|||||||
accelerators):
|
accelerators):
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.meta import get_metadata
|
from calibre.ebooks.metadata.meta import get_metadata
|
||||||
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS
|
from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
|
||||||
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
|
NAMESPACES = {'f':FB2NS, 'l':XLINK_NS}
|
||||||
log.debug('Parsing XML...')
|
log.debug('Parsing XML...')
|
||||||
raw = stream.read()
|
raw = stream.read().replace('\0', '')
|
||||||
try:
|
try:
|
||||||
doc = etree.fromstring(raw.replace('\0', ''))
|
doc = etree.fromstring(raw)
|
||||||
except etree.XMLSyntaxError:
|
except etree.XMLSyntaxError:
|
||||||
doc = etree.fromstring(raw.replace('& ', '&'))
|
try:
|
||||||
|
doc = etree.fromstring(raw, parser=RECOVER_PARSER)
|
||||||
|
except:
|
||||||
|
doc = etree.fromstring(raw.replace('& ', '&'),
|
||||||
|
parser=RECOVER_PARSER)
|
||||||
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
|
||||||
css = ''
|
css = ''
|
||||||
for s in stylesheets:
|
for s in stylesheets:
|
||||||
|
@ -22,7 +22,8 @@ def get_metadata(stream):
|
|||||||
'xlink':XLINK_NS})
|
'xlink':XLINK_NS})
|
||||||
tostring = lambda x : etree.tostring(x, method='text',
|
tostring = lambda x : etree.tostring(x, method='text',
|
||||||
encoding=unicode).strip()
|
encoding=unicode).strip()
|
||||||
root = etree.fromstring(stream.read())
|
parser = etree.XMLParser(recover=True, no_network=True)
|
||||||
|
root = etree.fromstring(stream.read(), parser=parser)
|
||||||
authors, author_sort = [], None
|
authors, author_sort = [], None
|
||||||
for au in XPath('//fb2:author')(root):
|
for au in XPath('//fb2:author')(root):
|
||||||
fname = lname = author = None
|
fname = lname = author = None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user