mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Various stability fixes for any2lit and any2mobi
This commit is contained in:
commit
2823867347
@ -11,6 +11,7 @@ import sys, struct, cStringIO, os
|
||||
import functools
|
||||
import re
|
||||
from urlparse import urldefrag
|
||||
from urllib import unquote as urlunquote
|
||||
from lxml import etree
|
||||
from calibre.ebooks.lit import LitError
|
||||
from calibre.ebooks.lit.maps import OPF_MAP, HTML_MAP
|
||||
@ -611,6 +612,8 @@ class LitReader(object):
|
||||
offset, raw = u32(raw), raw[4:]
|
||||
internal, raw = consume_sized_utf8_string(raw)
|
||||
original, raw = consume_sized_utf8_string(raw)
|
||||
# The path should be stored unquoted, but not always
|
||||
original = urlunquote(original)
|
||||
# Is this last one UTF-8 or ASCIIZ?
|
||||
mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
|
||||
self.manifest[internal] = ManifestItem(
|
||||
|
@ -331,6 +331,13 @@ class Manifest(object):
|
||||
def _force_xhtml(self, data):
|
||||
if self.oeb.encoding is not None:
|
||||
data = data.decode(self.oeb.encoding, 'replace')
|
||||
# Handle broken XHTML w/ SVG (ugh)
|
||||
if 'svg:' in data and SVG_NS not in data:
|
||||
data = data.replace(
|
||||
'<html', '<html xmlns:svg="%s"' % SVG_NS, 1)
|
||||
if 'xlink:' in data and XLINK_NS not in data:
|
||||
data = data.replace(
|
||||
'<html', '<html xmlns:xlink="%s"' % XLINK_NS, 1)
|
||||
try:
|
||||
data = etree.fromstring(data)
|
||||
except etree.XMLSyntaxError:
|
||||
@ -343,6 +350,24 @@ class Manifest(object):
|
||||
data = etree.fromstring(data)
|
||||
for meta in self.META_XP(data):
|
||||
meta.getparent().remove(meta)
|
||||
head = xpath(data, '/h:html/h:head')
|
||||
head = head[0] if head else None
|
||||
if head is None:
|
||||
self.oeb.logger.warn(
|
||||
'File %r missing <head/> element' % self.href)
|
||||
head = etree.Element(XHTML('head'))
|
||||
data.insert(0, head)
|
||||
title = etree.SubElement(head, XHTML('title'))
|
||||
title.text = self.oeb.translate(__('Unknown'))
|
||||
elif not xpath(data, '/h:html/h:head/h:title'):
|
||||
self.oeb.logger.warn(
|
||||
'File %r missing <title/> element' % self.href)
|
||||
title = etree.SubElement(head, XHTML('title'))
|
||||
title.text = self.oeb.translate(__('Unknown'))
|
||||
if not xpath(data, '/h:html/h:body'):
|
||||
self.oeb.logger.warn(
|
||||
'File %r missing <body/> element' % self.href)
|
||||
etree.SubElement(data, XHTML('body'))
|
||||
return data
|
||||
|
||||
def data():
|
||||
|
@ -110,7 +110,8 @@ class Stylizer(object):
|
||||
|
||||
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
||||
self.profile = profile
|
||||
base = os.path.dirname(path)
|
||||
self.logger = oeb.logger
|
||||
item = oeb.manifest.hrefs[path]
|
||||
basename = os.path.basename(path)
|
||||
cssname = os.path.splitext(basename)[0] + '.css'
|
||||
stylesheets = [HTML_CSS_STYLESHEET]
|
||||
@ -128,8 +129,12 @@ class Stylizer(object):
|
||||
and elem.get('rel', 'stylesheet') == 'stylesheet' \
|
||||
and elem.get('type', CSS_MIME) in OEB_STYLES:
|
||||
href = urlnormalize(elem.attrib['href'])
|
||||
path = os.path.join(base, href)
|
||||
path = os.path.normpath(path).replace('\\', '/')
|
||||
path = item.abshref(href)
|
||||
if path not in oeb.manifest.hrefs:
|
||||
self.logger.warn(
|
||||
'Stylesheet %r referenced by file %r not in manifest' %
|
||||
(path, item.href))
|
||||
continue
|
||||
if path in self.STYLESHEETS:
|
||||
stylesheet = self.STYLESHEETS[path]
|
||||
else:
|
||||
|
@ -13,6 +13,7 @@ from urlparse import urldefrag
|
||||
from lxml import etree
|
||||
import cssutils
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
|
||||
LINK_SELECTORS = []
|
||||
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
|
||||
@ -46,7 +47,7 @@ class ManifestTrimmer(object):
|
||||
item.data is not None:
|
||||
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
|
||||
for href in chain(*hrefs):
|
||||
href = item.abshref(href)
|
||||
href = item.abshref(urlnormalize(href))
|
||||
if href in oeb.manifest.hrefs:
|
||||
found = oeb.manifest.hrefs[href]
|
||||
if found not in used:
|
||||
|
Loading…
x
Reference in New Issue
Block a user