mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Fix #1649 (2). Yet more handling for broken (X)HTML.
This commit is contained in:
parent
3b5b9f1849
commit
0350cd79e3
@ -350,6 +350,24 @@ class Manifest(object):
|
|||||||
data = etree.fromstring(data)
|
data = etree.fromstring(data)
|
||||||
for meta in self.META_XP(data):
|
for meta in self.META_XP(data):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
|
head = xpath(data, '/h:html/h:head')
|
||||||
|
head = head[0] if head else None
|
||||||
|
if head is None:
|
||||||
|
self.oeb.logger.warn(
|
||||||
|
'File %r missing <head/> element' % self.href)
|
||||||
|
head = etree.Element(XHTML('head'))
|
||||||
|
data.insert(0, head)
|
||||||
|
title = etree.SubElement(head, XHTML('title'))
|
||||||
|
title.text = self.oeb.translate(__('Unknown'))
|
||||||
|
elif not xpath(data, '/h:html/h:head/h:title'):
|
||||||
|
self.oeb.logger.warn(
|
||||||
|
'File %r missing <title/> element' % self.href)
|
||||||
|
title = etree.SubElement(head, XHTML('title'))
|
||||||
|
title.text = self.oeb.translate(__('Unknown'))
|
||||||
|
if not xpath(data, '/h:html/h:body'):
|
||||||
|
self.oeb.logger.warn(
|
||||||
|
'File %r missing <body/> element' % self.href)
|
||||||
|
etree.SubElement(data, XHTML('body'))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def data():
|
def data():
|
||||||
|
@ -13,6 +13,7 @@ from urlparse import urldefrag
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
import cssutils
|
import cssutils
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
||||||
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
|
|
||||||
LINK_SELECTORS = []
|
LINK_SELECTORS = []
|
||||||
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||||
@ -46,7 +47,7 @@ class ManifestTrimmer(object):
|
|||||||
item.data is not None:
|
item.data is not None:
|
||||||
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
||||||
for href in chain(*hrefs):
|
for href in chain(*hrefs):
|
||||||
href = item.abshref(href)
|
href = item.abshref(urlnormalize(href))
|
||||||
if href in oeb.manifest.hrefs:
|
if href in oeb.manifest.hrefs:
|
||||||
found = oeb.manifest.hrefs[href]
|
found = oeb.manifest.hrefs[href]
|
||||||
if found not in used:
|
if found not in used:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user