mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Fix #1649 (2). Yet more handling for broken (X)HTML.
This commit is contained in:
parent
3b5b9f1849
commit
0350cd79e3
@ -350,6 +350,24 @@ class Manifest(object):
|
||||
data = etree.fromstring(data)
|
||||
for meta in self.META_XP(data):
|
||||
meta.getparent().remove(meta)
|
||||
head = xpath(data, '/h:html/h:head')
|
||||
head = head[0] if head else None
|
||||
if head is None:
|
||||
self.oeb.logger.warn(
|
||||
'File %r missing <head/> element' % self.href)
|
||||
head = etree.Element(XHTML('head'))
|
||||
data.insert(0, head)
|
||||
title = etree.SubElement(head, XHTML('title'))
|
||||
title.text = self.oeb.translate(__('Unknown'))
|
||||
elif not xpath(data, '/h:html/h:head/h:title'):
|
||||
self.oeb.logger.warn(
|
||||
'File %r missing <title/> element' % self.href)
|
||||
title = etree.SubElement(head, XHTML('title'))
|
||||
title.text = self.oeb.translate(__('Unknown'))
|
||||
if not xpath(data, '/h:html/h:body'):
|
||||
self.oeb.logger.warn(
|
||||
'File %r missing <body/> element' % self.href)
|
||||
etree.SubElement(data, XHTML('body'))
|
||||
return data
|
||||
|
||||
def data():
|
||||
|
@ -13,6 +13,7 @@ from urlparse import urldefrag
|
||||
from lxml import etree
|
||||
import cssutils
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
|
||||
LINK_SELECTORS = []
|
||||
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||
@ -46,7 +47,7 @@ class ManifestTrimmer(object):
|
||||
item.data is not None:
|
||||
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
||||
for href in chain(*hrefs):
|
||||
href = item.abshref(href)
|
||||
href = item.abshref(urlnormalize(href))
|
||||
if href in oeb.manifest.hrefs:
|
||||
found = oeb.manifest.hrefs[href]
|
||||
if found not in used:
|
||||
|
Loading…
x
Reference in New Issue
Block a user