This commit is contained in:
Kovid Goyal 2009-02-03 11:20:27 -08:00
commit 072a063be3
6 changed files with 186 additions and 64 deletions

View File

@ -21,6 +21,9 @@ mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs') mimetypes.add_type('text/x-sony-bbeb+xml', '.lrs')
mimetypes.add_type('application/x-sony-bbeb', '.lrf') mimetypes.add_type('application/x-sony-bbeb', '.lrf')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') mimetypes.add_type('application/x-dtbncx+xml', '.ncx')
mimetypes.add_type('application/adobe-page-template+xml', '.xpgt')
mimetypes.add_type('application/x-font-opentype', '.otf')
mimetypes.add_type('application/x-font-truetype', '.ttf')
def to_unicode(raw, encoding='utf-8', errors='strict'): def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode): if isinstance(raw, unicode):

View File

@ -23,7 +23,7 @@ from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
from calibre.ebooks.lit.reader import DirectoryEntry from calibre.ebooks.lit.reader import DirectoryEntry
import calibre.ebooks.lit.maps as maps import calibre.ebooks.lit.maps as maps
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \ from calibre.ebooks.oeb.base import OEB_DOCS, XHTML_MIME, OEB_STYLES, \
CSS_MIME, OPF_MIME, XML_NS, XML CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \ from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
urlnormalize, xpath urlnormalize, xpath
@ -474,7 +474,7 @@ class LitWriter(object):
name = '/data/' + item.id name = '/data/' + item.id
data = item.data data = item.data
secnum = 0 secnum = 0
if not isinstance(data, basestring): if isinstance(data, etree._Element):
self._add_folder(name) self._add_folder(name)
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP) rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/ahc', rebin.ahc, 0)
@ -483,6 +483,8 @@ class LitWriter(object):
data = rebin.content data = rebin.content
name = name + '/content' name = name + '/content'
secnum = 1 secnum = 1
elif isinstance(data, unicode):
data = data.encode('utf-8')
self._add_file(name, data, secnum) self._add_file(name, data, secnum)
item.size = len(data) item.size = len(data)
@ -493,7 +495,7 @@ class LitWriter(object):
if item.spine_position is not None: if item.spine_position is not None:
key = 'linear' if item.linear else 'nonlinear' key = 'linear' if item.linear else 'nonlinear'
manifest[key].append(item) manifest[key].append(item)
elif item.media_type == CSS_MIME: elif item.media_type in OEB_STYLES:
manifest['css'].append(item) manifest['css'].append(item)
elif item.media_type in LIT_IMAGES: elif item.media_type in LIT_IMAGES:
manifest['images'].append(item) manifest['images'].append(item)
@ -506,6 +508,11 @@ class LitWriter(object):
data.write(pack('<I', len(items))) data.write(pack('<I', len(items)))
for item in items: for item in items:
id, media_type = item.id, item.media_type id, media_type = item.id, item.media_type
if media_type in OEB_DOCS:
# Needs to have 'html' in media-type
media_type = XHTML_MIME
elif media_type in OEB_STYLES:
media_type = CSS_MIME
href = urlunquote(item.href) href = urlunquote(item.href)
item.offset = offset \ item.offset = offset \
if state in ('linear', 'nonlinear') else 0 if state in ('linear', 'nonlinear') else 0
@ -525,7 +532,12 @@ class LitWriter(object):
pb3 = StringIO() pb3 = StringIO()
pb3cur = 0 pb3cur = 0
bits = 0 bits = 0
linear = []
nonlinear = []
for item in self._oeb.spine: for item in self._oeb.spine:
dest = linear if item.linear else nonlinear
dest.append(item)
for item in chain(linear, nonlinear):
page_breaks = copy.copy(item.page_breaks) page_breaks = copy.copy(item.page_breaks)
if not item.linear: if not item.linear:
page_breaks.insert(0, (0, [])) page_breaks.insert(0, (0, []))

View File

@ -62,6 +62,16 @@ def SVG(name): return '{%s}%s' % (SVG_NS, name)
def XLINK(name): return '{%s}%s' % (XLINK_NS, name) def XLINK(name): return '{%s}%s' % (XLINK_NS, name)
def CALIBRE(name): return '{%s}%s' % (CALIBRE_NS, name) def CALIBRE(name): return '{%s}%s' % (CALIBRE_NS, name)
def LINK_SELECTORS():
results = []
for expr in ('h:head/h:link/@href', 'h:body//h:a/@href',
'h:body//h:img/@src', 'h:body//h:object/@data',
'h:body//*/@xl:href', '//ncx:content/@src',
'o2:page/@href'):
results.append(etree.XPath(expr, namespaces=XPNSMAP))
return results
LINK_SELECTORS = LINK_SELECTORS()
EPUB_MIME = 'application/epub+zip' EPUB_MIME = 'application/epub+zip'
XHTML_MIME = 'application/xhtml+xml' XHTML_MIME = 'application/xhtml+xml'
CSS_MIME = 'text/css' CSS_MIME = 'text/css'
@ -89,6 +99,10 @@ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
QNAME_RE = re.compile(r'^[{][^{}]+[}][^{}]+$') QNAME_RE = re.compile(r'^[{][^{}]+[}][^{}]+$')
PREFIXNAME_RE = re.compile(r'^[^:]+[:][^:]+') PREFIXNAME_RE = re.compile(r'^[^:]+[:][^:]+')
XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>') XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
CSSURL_RE = re.compile(r'''url[(](?P<q>["']?)(?P<url>[^)]+)(?P=q)[)]''')
RECOVER_PARSER = etree.XMLParser(recover=True)
def element(parent, *args, **kwargs): def element(parent, *args, **kwargs):
if parent is not None: if parent is not None:
@ -140,14 +154,17 @@ def xml2str(root):
return etree.tostring(root, encoding='utf-8', xml_declaration=True) return etree.tostring(root, encoding='utf-8', xml_declaration=True)
ASCII_CHARS = set(chr(x) for x in xrange(128)) ASCII_CHARS = set(chr(x) for x in xrange(128))
URL_SAFE = set(u'ABCDEFGHIJKLMNOPQRSTUVWXYZ' UNIBYTE_CHARS = set(chr(x) for x in xrange(256))
u'abcdefghijklmnopqrstuvwxyz' URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
u'0123456789' u'_.-/~') 'abcdefghijklmnopqrstuvwxyz'
URL_UNSAFE = ASCII_CHARS - URL_SAFE '0123456789' '_.-/~')
URL_UNSAFE = [ASCII_CHARS - URL_SAFE, UNIBYTE_CHARS - URL_SAFE]
def urlquote(href): def urlquote(href):
result = [] result = []
unsafe = 0 if isinstance(href, unicode) else 1
unsafe = URL_UNSAFE[unsafe]
for char in href: for char in href:
if char in URL_UNSAFE: if char in unsafe:
char = "%%%02x" % ord(char) char = "%%%02x" % ord(char)
result.append(char) result.append(char)
return ''.join(result) return ''.join(result)
@ -185,7 +202,7 @@ class AbstractContainer(object):
class DirContainer(AbstractContainer): class DirContainer(AbstractContainer):
def __init__(self, rootdir): def __init__(self, rootdir):
self.rootdir = rootdir self.rootdir = unicode(rootdir)
def read(self, path): def read(self, path):
path = os.path.join(self.rootdir, path) path = os.path.join(self.rootdir, path)
@ -205,16 +222,23 @@ class DirContainer(AbstractContainer):
return os.path.isfile(urlunquote(path)) return os.path.isfile(urlunquote(path))
class DirWriter(object): class DirWriter(object):
def __init__(self, version=2.0): def __init__(self, version='2.0', page_map=False):
self.version = version self.version = version
self.page_map = page_map
def dump(self, oeb, path): def dump(self, oeb, path):
version = int(self.version[0])
if not os.path.isdir(path): if not os.path.isdir(path):
os.mkdir(path) os.mkdir(path)
output = DirContainer(path) output = DirContainer(path)
for item in oeb.manifest.values(): for item in oeb.manifest.values():
output.write(item.href, str(item)) output.write(item.href, str(item))
metadata = oeb.to_opf2() if self.version == 2 else oeb.to_opf1() if version == 1:
metadata = oeb.to_opf1()
elif version == 2:
metadata = oeb.to_opf2(page_map=self.page_map)
else:
raise OEBError("Unrecognized OPF version %r" % self.version)
for href, data in metadata.values(): for href, data in metadata.values():
output.write(href, xml2str(data)) output.write(href, xml2str(data))
return return
@ -455,7 +479,6 @@ class Manifest(object):
# Convert to Unicode and normalize line endings # Convert to Unicode and normalize line endings
data = self.oeb.decode(data) data = self.oeb.decode(data)
data = XMLDECL_RE.sub('', data) data = XMLDECL_RE.sub('', data)
data = data.replace('\r\n', '\n').replace('\r', '\n')
# Handle broken XHTML w/ SVG (ugh) # Handle broken XHTML w/ SVG (ugh)
if 'svg:' in data and SVG_NS not in data: if 'svg:' in data and SVG_NS not in data:
data = data.replace( data = data.replace(
@ -480,7 +503,10 @@ class Manifest(object):
if elem.text: if elem.text:
elem.text = elem.text.strip('-') elem.text = elem.text.strip('-')
data = etree.tostring(data, encoding=unicode) data = etree.tostring(data, encoding=unicode)
data = etree.fromstring(data) try:
data = etree.fromstring(data)
except etree.XMLSyntaxError:
data = etree.fromstring(data, parser=RECOVER_PARSER)
# Force into the XHTML namespace # Force into the XHTML namespace
if barename(data.tag) != 'html': if barename(data.tag) != 'html':
raise OEBError( raise OEBError(
@ -536,6 +562,8 @@ class Manifest(object):
data = self._force_xhtml(data) data = self._force_xhtml(data)
elif self.media_type[-4:] in ('+xml', '/xml'): elif self.media_type[-4:] in ('+xml', '/xml'):
data = etree.fromstring(data) data = etree.fromstring(data)
elif self.media_type in OEB_STYLES:
data = self.oeb.decode(data)
self._data = data self._data = data
return data return data
def fset(self, value): def fset(self, value):
@ -549,6 +577,8 @@ class Manifest(object):
data = self.data data = self.data
if isinstance(data, etree._Element): if isinstance(data, etree._Element):
return xml2str(data) return xml2str(data)
if isinstance(data, unicode):
return data.encode('utf-8')
return str(data) return str(data)
def __eq__(self, other): def __eq__(self, other):
@ -572,7 +602,9 @@ class Manifest(object):
return cmp(skey, okey) return cmp(skey, okey)
def relhref(self, href): def relhref(self, href):
if '/' not in self.href or ':' in href: if urlparse(href).scheme:
return href
if '/' not in self.href:
return href return href
base = os.path.dirname(self.href).split('/') base = os.path.dirname(self.href).split('/')
target, frag = urldefrag(href) target, frag = urldefrag(href)
@ -588,7 +620,12 @@ class Manifest(object):
return relhref return relhref
def abshref(self, href): def abshref(self, href):
if '/' not in self.href or ':' in href: if urlparse(href).scheme:
return href
path, frag = urldefrag(href)
if not path:
return '#'.join((self.href, frag))
if '/' not in self.href:
return href return href
dirname = os.path.dirname(self.href) dirname = os.path.dirname(self.href)
href = os.path.join(dirname, href) href = os.path.join(dirname, href)
@ -615,18 +652,20 @@ class Manifest(object):
if item in self.oeb.spine: if item in self.oeb.spine:
self.oeb.spine.remove(item) self.oeb.spine.remove(item)
def generate(self, id, href): def generate(self, id=None, href=None):
href = urlnormalize(href) if id is not None:
base = id base = id
index = 1 index = 1
while id in self.ids: while id in self.ids:
id = base + str(index) id = base + str(index)
index += 1 index += 1
base, ext = os.path.splitext(href) if href is not None:
index = 1 href = urlnormalize(href)
while href in self.hrefs: base, ext = os.path.splitext(href)
href = base + str(index) + ext index = 1
index += 1 while href in self.hrefs:
href = base + str(index) + ext
index += 1
return id, href return id, href
def __iter__(self): def __iter__(self):
@ -996,13 +1035,11 @@ class OEBBook(object):
metadata = etree.SubElement(nroot, OPF('metadata'), nsmap=nsmap) metadata = etree.SubElement(nroot, OPF('metadata'), nsmap=nsmap)
ignored = (OPF('dc-metadata'), OPF('x-metadata')) ignored = (OPF('dc-metadata'), OPF('x-metadata'))
for elem in xpath(opf, 'o2:metadata//*'): for elem in xpath(opf, 'o2:metadata//*'):
if elem.tag in ignored:
continue
if namespace(elem.tag) in DC_NSES: if namespace(elem.tag) in DC_NSES:
tag = barename(elem.tag).lower() tag = barename(elem.tag).lower()
elem.tag = '{%s}%s' % (DC11_NS, tag) elem.tag = '{%s}%s' % (DC11_NS, tag)
for name in elem.attrib:
if name in ('role', 'file-as', 'scheme', 'event'):
nsname = '{%s}%s' % (OPF2_NS, name)
elem.attrib[nsname] = elem.attrib.pop(name)
metadata.append(elem) metadata.append(elem)
for element in xpath(opf, 'o2:metadata//o2:meta'): for element in xpath(opf, 'o2:metadata//o2:meta'):
metadata.append(element) metadata.append(element)
@ -1015,7 +1052,6 @@ class OEBBook(object):
data = self.container.read(opfpath) data = self.container.read(opfpath)
data = self.decode(data) data = self.decode(data)
data = XMLDECL_RE.sub('', data) data = XMLDECL_RE.sub('', data)
data = data.replace('\r\n', '\n').replace('\r', '\n')
try: try:
opf = etree.fromstring(data) opf = etree.fromstring(data)
except etree.XMLSyntaxError: except etree.XMLSyntaxError:
@ -1077,6 +1113,45 @@ class OEBBook(object):
if not metadata.title: if not metadata.title:
self.logger.warn('Title not specified') self.logger.warn('Title not specified')
metadata.add('title', self.translate(__('Unknown'))) metadata.add('title', self.translate(__('Unknown')))
def _manifest_add_missing(self):
manifest = self.manifest
known = set(manifest.hrefs)
unchecked = set(manifest.values())
while unchecked:
new = set()
for item in unchecked:
if (item.media_type in OEB_DOCS or
item.media_type[-4:] in ('/xml', '+xml')) and \
item.data is not None:
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
href, _ = urldefrag(href)
if not href:
continue
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
elif item.media_type in OEB_STYLES:
for match in CSSURL_RE.finditer(item.data):
href, _ = urldefrag(match.group('url'))
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
unchecked.clear()
for href in new:
known.add(href)
if not self.container.exists(href):
self.logger.warn('Referenced file %r not found' % href)
continue
self.logger.warn('Referenced file %r not in manifest' % href)
id, _ = manifest.generate(id='added')
guessed = mimetypes.guess_type(href)[0]
media_type = guessed or BINARY_MIME
added = manifest.add(id, href, media_type)
unchecked.add(added)
def _manifest_from_opf(self, opf): def _manifest_from_opf(self, opf):
self.manifest = manifest = Manifest(self) self.manifest = manifest = Manifest(self)
@ -1100,6 +1175,40 @@ class OEBBook(object):
self.logger.warn(u'Duplicate manifest id %r' % id) self.logger.warn(u'Duplicate manifest id %r' % id)
id, href = manifest.generate(id, href) id, href = manifest.generate(id, href)
manifest.add(id, href, media_type, fallback) manifest.add(id, href, media_type, fallback)
self._manifest_add_missing()
def _spine_add_extra(self):
manifest = self.manifest
spine = self.spine
unchecked = set(spine)
selector = XPath('h:body//h:a/@href')
extras = set()
while unchecked:
new = set()
for item in unchecked:
if item.media_type not in OEB_DOCS:
# TODO: handle fallback chains
continue
for href in selector(item.data):
href, _ = urldefrag(href)
if not href:
continue
href = item.abshref(urlnormalize(href))
if href not in manifest.hrefs:
continue
found = manifest.hrefs[href]
if found.media_type not in OEB_DOCS or \
found in spine or found in extras:
continue
new.add(found)
extras.update(new)
unchecked = new
version = int(self.version[0])
for item in sorted(extras):
if version >= 2:
self.logger.warn(
'Spine-referenced file %r not in spine' % item.href)
spine.add(item, linear=False)
def _spine_from_opf(self, opf): def _spine_from_opf(self, opf):
self.spine = spine = Spine(self) self.spine = spine = Spine(self)
@ -1110,16 +1219,9 @@ class OEBBook(object):
continue continue
item = self.manifest[idref] item = self.manifest[idref]
spine.add(item, elem.get('linear')) spine.add(item, elem.get('linear'))
extras = []
for item in self.manifest.values():
if item.media_type in OEB_DOCS \
and item not in spine:
extras.append(item)
extras.sort()
for item in extras:
spine.add(item, False)
if len(spine) == 0: if len(spine) == 0:
raise OEBError("Spine is empty") raise OEBError("Spine is empty")
self._spine_add_extra()
def _guide_from_opf(self, opf): def _guide_from_opf(self, opf):
self.guide = guide = Guide(self) self.guide = guide = Guide(self)
@ -1189,12 +1291,11 @@ class OEBBook(object):
href = site.get('href') href = site.get('href')
if not title or not href: if not title or not href:
continue continue
href = item.abshref(urlnormalize(href)) path, _ = urldefrag(urlnormalize(href))
path, _ = urldefrag(href)
if path not in self.manifest.hrefs: if path not in self.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href) self.logger.warn('TOC reference %r not found' % href)
continue continue
id = child.get('id') id = site.get('id')
toc.add(title, href, id=id) toc.add(title, href, id=id)
return True return True
@ -1217,12 +1318,12 @@ class OEBBook(object):
order = [] order = []
for anchor in xpath(html, './/h:a[@href]'): for anchor in xpath(html, './/h:a[@href]'):
href = anchor.attrib['href'] href = anchor.attrib['href']
href = item.abshref(urlnormalize(href))
path, frag = urldefrag(href) path, frag = urldefrag(href)
if not path: if path not in self.manifest.hrefs:
href = '#'.join((itempath, frag)) continue
title = ' '.join(xpath(anchor, './/text()')) title = ' '.join(xpath(anchor, './/text()'))
title = COLLAPSE_RE.sub(' ', title.strip()) title = COLLAPSE_RE.sub(' ', title.strip())
href = urlnormalize(href)
if href not in titles: if href not in titles:
order.append(href) order.append(href)
titles[href].append(title) titles[href].append(title)
@ -1313,7 +1414,12 @@ class OEBBook(object):
continue continue
name = COLLAPSE_RE.sub(' ', name.strip()) name = COLLAPSE_RE.sub(' ', name.strip())
href = item.abshref(urlnormalize(href)) href = item.abshref(urlnormalize(href))
pages.add(name, href) type = 'normal'
if not name:
type = 'special'
elif name.lower().strip('ivxlcdm') == '':
type = 'front'
pages.add(name, href, type=type)
return True return True
def _pages_from_opf(self, opf, item): def _pages_from_opf(self, opf, item):
@ -1337,8 +1443,10 @@ class OEBBook(object):
if self.metadata.cover: if self.metadata.cover:
id = str(self.metadata.cover[0]) id = str(self.metadata.cover[0])
item = self.manifest.ids.get(id, None) item = self.manifest.ids.get(id, None)
if item is not None: if item is not None and item.media_type in OEB_IMAGES:
return item return item
else:
self.logger.warn('Invalid cover image @id %r' % id)
hcover = self.spine[0] hcover = self.spine[0]
if 'cover' in self.guide: if 'cover' in self.guide:
href = self.guide['cover'].href href = self.guide['cover'].href
@ -1376,6 +1484,7 @@ class OEBBook(object):
self.metadata.add('cover', cover.id) self.metadata.add('cover', cover.id)
def _all_from_opf(self, opf): def _all_from_opf(self, opf):
self.version = opf.get('version', '1.2')
self._metadata_from_opf(opf) self._metadata_from_opf(opf)
self._manifest_from_opf(opf) self._manifest_from_opf(opf)
self._spine_from_opf(opf) self._spine_from_opf(opf)
@ -1384,7 +1493,7 @@ class OEBBook(object):
self._toc_from_opf(opf, item) self._toc_from_opf(opf, item)
self._pages_from_opf(opf, item) self._pages_from_opf(opf, item)
self._ensure_cover_image() self._ensure_cover_image()
def translate(self, text): def translate(self, text):
lang = str(self.metadata.language[0]) lang = str(self.metadata.language[0])
lang = lang.split('-', 1)[0].lower() lang = lang.split('-', 1)[0].lower()
@ -1408,6 +1517,8 @@ class OEBBook(object):
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
data, _ = xml_to_unicode(data) data, _ = xml_to_unicode(data)
data = data.replace('\r\n', '\n')
data = data.replace('\r', '\n')
return data return data
def to_opf1(self): def to_opf1(self):
@ -1447,7 +1558,8 @@ class OEBBook(object):
next += 1 next += 1
selector = XPath('ncx:content/@src') selector = XPath('ncx:content/@src')
for elem in xpath(ncx, '//*[@playOrder and ./ncx:content[@src]]'): for elem in xpath(ncx, '//*[@playOrder and ./ncx:content[@src]]'):
order = playorder[selector(elem)[0]] href = selector(elem)[0]
order = playorder.get(href, 0)
elem.attrib['playOrder'] = str(order) elem.attrib['playOrder'] = str(order)
return return

View File

@ -172,9 +172,8 @@ class Stylizer(object):
if path not in hrefs: if path not in hrefs:
return (None, None) return (None, None)
data = hrefs[path].data data = hrefs[path].data
data = self.oeb.decode(data)
data = XHTML_CSS_NAMESPACE + data data = XHTML_CSS_NAMESPACE + data
return (None, data) return ('utf-8', data)
def flatten_rule(self, rule, href, index): def flatten_rule(self, rule, href, index):
results = [] results = []

View File

@ -13,13 +13,9 @@ from urlparse import urldefrag
from lxml import etree from lxml import etree
import cssutils import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
LINK_SELECTORS = []
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
'//*/@xl:href'):
LINK_SELECTORS.append(etree.XPath(expr, namespaces=XPNSMAP))
class ManifestTrimmer(object): class ManifestTrimmer(object):
def transform(self, oeb, context): def transform(self, oeb, context):
oeb.logger.info('Trimming unused files from manifest...') oeb.logger.info('Trimming unused files from manifest...')
@ -53,15 +49,13 @@ class ManifestTrimmer(object):
if found not in used: if found not in used:
new.add(found) new.add(found)
elif item.media_type == CSS_MIME: elif item.media_type == CSS_MIME:
def replacer(uri): for match in CSSURL_RE.finditer(item.data):
absuri = item.abshref(urlnormalize(uri)) href = match.group('url')
if absuri in oeb.manifest.hrefs: href = item.abshref(urlnormalize(href))
if href in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href] found = oeb.manifest.hrefs[href]
if found not in used: if found not in used:
new.add(found) new.add(found)
return uri
sheet = cssutils.parseString(item.data, href=item.href)
cssutils.replaceUrls(sheet, replacer)
used.update(new) used.update(new)
unchecked = new unchecked = new
for item in oeb.manifest.values(): for item in oeb.manifest.values():

View File

@ -840,7 +840,9 @@ def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
try: try:
# encoding may still be wrong if encoding *is lying*! # encoding may still be wrong if encoding *is lying*!
if content is not None: if isinstance(content, unicode):
decodedCssText = content
elif content is not None:
decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0] decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0]
else: else:
decodedCssText = None decodedCssText = None