From 19a05d4d4f70838853a38c332bd6e4bf31832e97 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 17 Jun 2009 15:04:52 -0700 Subject: [PATCH] Fix #2612 (AttributeError: 'str' object has no attribute 'xpath') --- src/calibre/ebooks/lit/reader.py | 2 +- src/calibre/ebooks/oeb/base.py | 43 +++++++++++++++++++++++++++++--- src/calibre/ebooks/oeb/reader.py | 2 ++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index 37328328b7..cccc04063f 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -370,7 +370,7 @@ class ManifestItem(object): def __init__(self, original, internal, mime_type, offset, root, state): self.original = original self.internal = internal - self.mime_type = mime_type + self.mime_type = mime_type.lower() if hasattr(mime_type, 'lower') else mime_type self.offset = offset self.root = root self.state = state diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 3ece412013..9f43ff7d8b 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -849,6 +849,38 @@ class Manifest(object): return data + def _parse_txt(self, data): + if '' in data: + return self._parse_xhtml(data) + from xml.sax.saxutils import escape + self.oeb.log.debug('Converting', self.href, '...') + paras = [] + lines = [] + for l in data.splitlines(): + if not l: + if lines: + paras.append('

'+'\n'.join(lines)+'

') + lines = [] + lines.append(escape(l)) + + if lines: + paras.append('

'+'\n'.join(lines)+'

') + title = self.oeb.metadata.title + if title: + title = unicode(title[0]) + else: + title = 'No title' + data = '''\ + + %s + %s + + '''%(title, '\n'.join(paras)) + data = self._parse_xhtml(data) + print etree.tostring(data) + return data + + def _parse_css(self, data): self.oeb.log.debug('Parsing', self.href, '...') data = self.oeb.decode(data) @@ -895,12 +927,17 @@ class Manifest(object): data = self._loader(self.href) if not isinstance(data, basestring): pass # already parsed - elif self.media_type in OEB_DOCS: + elif self.media_type.lower() in OEB_DOCS: data = self._parse_xhtml(data) - elif self.media_type[-4:] in ('+xml', '/xml'): + elif self.media_type.lower()[-4:] in ('+xml', '/xml'): data = etree.fromstring(data) - elif self.media_type in OEB_STYLES: + elif self.media_type.lower() in OEB_STYLES: data = self._parse_css(data) + elif 'text' in self.media_type.lower(): + self.oeb.log.warn('%s contains data in TXT format'%self.href, + 'converting to HTML') + data = self._parse_txt(data) + self.media_type = XHTML_MIME self._data = data return data def fset(self, value): diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 89ef6b1ba3..eccc1016bb 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -247,6 +247,8 @@ class OEBReader(object): if media_type is None or media_type == 'text/xml': guessed = guess_type(href)[0] media_type = guessed or media_type or BINARY_MIME + if hasattr(media_type, 'lower'): + media_type = media_type.lower() fallback = elem.get('fallback') if href in manifest.hrefs: self.logger.warn(u'Duplicate manifest entry for %r' % href)