mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion: Add workaround for converting EPUB files with broken (probably hand edited) OPF files. See #1389188 (Private bug)
This commit is contained in:
parent
533dcb41d6
commit
d372ffa238
@ -76,7 +76,7 @@ class OEBReader(object):
|
|||||||
for elem in opf.iter(tag=etree.Element):
|
for elem in opf.iter(tag=etree.Element):
|
||||||
nsmap.update(elem.nsmap)
|
nsmap.update(elem.nsmap)
|
||||||
for elem in opf.iter(tag=etree.Element):
|
for elem in opf.iter(tag=etree.Element):
|
||||||
if namespace(elem.tag) in ('', OPF1_NS):
|
if namespace(elem.tag) in ('', OPF1_NS) and ':' not in barename(elem.tag):
|
||||||
elem.tag = OPF(barename(elem.tag))
|
elem.tag = OPF(barename(elem.tag))
|
||||||
nsmap.update(OPF2_NSMAP)
|
nsmap.update(OPF2_NSMAP)
|
||||||
attrib = dict(opf.attrib)
|
attrib = dict(opf.attrib)
|
||||||
@ -90,6 +90,9 @@ class OEBReader(object):
|
|||||||
if namespace(elem.tag) in DC_NSES:
|
if namespace(elem.tag) in DC_NSES:
|
||||||
tag = barename(elem.tag).lower()
|
tag = barename(elem.tag).lower()
|
||||||
elem.tag = '{%s}%s' % (DC11_NS, tag)
|
elem.tag = '{%s}%s' % (DC11_NS, tag)
|
||||||
|
if elem.tag.startswith('dc:'):
|
||||||
|
tag = elem.tag.partition(':')[-1].lower()
|
||||||
|
elem.tag = '{%s}%s' % (DC11_NS, tag)
|
||||||
metadata.append(elem)
|
metadata.append(elem)
|
||||||
for element in xpath(opf, 'o2:metadata//o2:meta'):
|
for element in xpath(opf, 'o2:metadata//o2:meta'):
|
||||||
metadata.append(element)
|
metadata.append(element)
|
||||||
@ -115,8 +118,13 @@ class OEBReader(object):
|
|||||||
data = re.sub(r'(?is)<tours>.+</tours>', '', data)
|
data = re.sub(r'(?is)<tours>.+</tours>', '', data)
|
||||||
data = data.replace('<dc-metadata>',
|
data = data.replace('<dc-metadata>',
|
||||||
'<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">')
|
'<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">')
|
||||||
opf = etree.fromstring(data)
|
try:
|
||||||
self.logger.warn('OPF contains invalid tours section')
|
opf = etree.fromstring(data)
|
||||||
|
self.logger.warn('OPF contains invalid tours section')
|
||||||
|
except etree.XMLSyntaxError:
|
||||||
|
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
|
||||||
|
opf = etree.fromstring(data, parser=RECOVER_PARSER)
|
||||||
|
self.logger.warn('OPF contains invalid markup, trying to parse it anyway')
|
||||||
|
|
||||||
ns = namespace(opf.tag)
|
ns = namespace(opf.tag)
|
||||||
if ns not in ('', OPF1_NS, OPF2_NS):
|
if ns not in ('', OPF1_NS, OPF2_NS):
|
||||||
@ -691,7 +699,7 @@ class OEBReader(object):
|
|||||||
item = self._find_ncx(opf)
|
item = self._find_ncx(opf)
|
||||||
self._toc_from_opf(opf, item)
|
self._toc_from_opf(opf, item)
|
||||||
self._pages_from_opf(opf, item)
|
self._pages_from_opf(opf, item)
|
||||||
#self._ensure_cover_image()
|
# self._ensure_cover_image()
|
||||||
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
def main(argv=sys.argv):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user