mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various stability fixes for any2lit and any2mobi
This commit is contained in:
commit
2823867347
@ -11,6 +11,7 @@ import sys, struct, cStringIO, os
|
|||||||
import functools
|
import functools
|
||||||
import re
|
import re
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
|
from urllib import unquote as urlunquote
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre.ebooks.lit import LitError
|
from calibre.ebooks.lit import LitError
|
||||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||||
@ -611,6 +612,8 @@ class LitReader(object):
|
|||||||
offset, raw = u32(raw), raw[4:]
|
offset, raw = u32(raw), raw[4:]
|
||||||
internal, raw = consume_sized_utf8_string(raw)
|
internal, raw = consume_sized_utf8_string(raw)
|
||||||
original, raw = consume_sized_utf8_string(raw)
|
original, raw = consume_sized_utf8_string(raw)
|
||||||
|
# The path should be stored unquoted, but not always
|
||||||
|
original = urlunquote(original)
|
||||||
# Is this last one UTF-8 or ASCIIZ?
|
# Is this last one UTF-8 or ASCIIZ?
|
||||||
mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
|
mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
|
||||||
self.manifest[internal] = ManifestItem(
|
self.manifest[internal] = ManifestItem(
|
||||||
|
@ -331,6 +331,13 @@ class Manifest(object):
|
|||||||
def _force_xhtml(self, data):
|
def _force_xhtml(self, data):
|
||||||
if self.oeb.encoding is not None:
|
if self.oeb.encoding is not None:
|
||||||
data = data.decode(self.oeb.encoding, 'replace')
|
data = data.decode(self.oeb.encoding, 'replace')
|
||||||
|
# Handle broken XHTML w/ SVG (ugh)
|
||||||
|
if 'svg:' in data and SVG_NS not in data:
|
||||||
|
data = data.replace(
|
||||||
|
'<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
|
||||||
|
if 'xlink:' in data and XLINK_NS not in data:
|
||||||
|
data = data.replace(
|
||||||
|
'<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)
|
||||||
try:
|
try:
|
||||||
data = etree.fromstring(data)
|
data = etree.fromstring(data)
|
||||||
except etree.XMLSyntaxError:
|
except etree.XMLSyntaxError:
|
||||||
@ -343,6 +350,24 @@ class Manifest(object):
|
|||||||
data = etree.fromstring(data)
|
data = etree.fromstring(data)
|
||||||
for meta in self.META_XP(data):
|
for meta in self.META_XP(data):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
|
head = xpath(data, '/h:html/h:head')
|
||||||
|
head = head[0] if head else None
|
||||||
|
if head is None:
|
||||||
|
self.oeb.logger.warn(
|
||||||
|
'File %r missing <head/> element' % self.href)
|
||||||
|
head = etree.Element(XHTML('head'))
|
||||||
|
data.insert(0, head)
|
||||||
|
title = etree.SubElement(head, XHTML('title'))
|
||||||
|
title.text = self.oeb.translate(__('Unknown'))
|
||||||
|
elif not xpath(data, '/h:html/h:head/h:title'):
|
||||||
|
self.oeb.logger.warn(
|
||||||
|
'File %r missing <title/> element' % self.href)
|
||||||
|
title = etree.SubElement(head, XHTML('title'))
|
||||||
|
title.text = self.oeb.translate(__('Unknown'))
|
||||||
|
if not xpath(data, '/h:html/h:body'):
|
||||||
|
self.oeb.logger.warn(
|
||||||
|
'File %r missing <body/> element' % self.href)
|
||||||
|
etree.SubElement(data, XHTML('body'))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def data():
|
def data():
|
||||||
|
@ -110,7 +110,8 @@ class Stylizer(object):
|
|||||||
|
|
||||||
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
||||||
self.profile = profile
|
self.profile = profile
|
||||||
base = os.path.dirname(path)
|
self.logger = oeb.logger
|
||||||
|
item = oeb.manifest.hrefs[path]
|
||||||
basename = os.path.basename(path)
|
basename = os.path.basename(path)
|
||||||
cssname = os.path.splitext(basename)[0] + '.css'
|
cssname = os.path.splitext(basename)[0] + '.css'
|
||||||
stylesheets = [HTML_CSS_STYLESHEET]
|
stylesheets = [HTML_CSS_STYLESHEET]
|
||||||
@ -128,8 +129,12 @@ class Stylizer(object):
|
|||||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
href = urlnormalize(elem.attrib['href'])
|
href = urlnormalize(elem.attrib['href'])
|
||||||
path = os.path.join(base, href)
|
path = item.abshref(href)
|
||||||
path = os.path.normpath(path).replace('\\', '/')
|
if path not in oeb.manifest.hrefs:
|
||||||
|
self.logger.warn(
|
||||||
|
'Stylesheet %r referenced by file %r not in manifest' %
|
||||||
|
(path, item.href))
|
||||||
|
continue
|
||||||
if path in self.STYLESHEETS:
|
if path in self.STYLESHEETS:
|
||||||
stylesheet = self.STYLESHEETS[path]
|
stylesheet = self.STYLESHEETS[path]
|
||||||
else:
|
else:
|
||||||
|
@ -13,6 +13,7 @@ from urlparse import urldefrag
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
import cssutils
|
import cssutils
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
||||||
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
|
|
||||||
LINK_SELECTORS = []
|
LINK_SELECTORS = []
|
||||||
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||||
@ -46,7 +47,7 @@ class ManifestTrimmer(object):
|
|||||||
item.data is not None:
|
item.data is not None:
|
||||||
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
||||||
for href in chain(*hrefs):
|
for href in chain(*hrefs):
|
||||||
href = item.abshref(href)
|
href = item.abshref(urlnormalize(href))
|
||||||
if href in oeb.manifest.hrefs:
|
if href in oeb.manifest.hrefs:
|
||||||
found = oeb.manifest.hrefs[href]
|
found = oeb.manifest.hrefs[href]
|
||||||
if found not in used:
|
if found not in used:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user