From 7a6afe4ee47a1fb4ef379450ccd49b56b9b457c1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Apr 2009 20:21:37 -0700 Subject: [PATCH] Fix various regressions --- src/calibre/ebooks/mobi/palmdoc.c | 4 ++-- src/calibre/ebooks/mobi/reader.py | 22 ++++++++++++++++------ src/calibre/ebooks/oeb/base.py | 2 ++ src/calibre/ebooks/oeb/transforms/guide.py | 6 ++++-- src/calibre/ebooks/oeb/transforms/split.py | 9 +++------ 5 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/calibre/ebooks/mobi/palmdoc.c b/src/calibre/ebooks/mobi/palmdoc.c index 87c6c32da8..29e9579140 100644 --- a/src/calibre/ebooks/mobi/palmdoc.c +++ b/src/calibre/ebooks/mobi/palmdoc.c @@ -118,9 +118,9 @@ cpalmdoc_do_compress(buffer *b, char *output) { found = false; for (chunk_len = 10; chunk_len > 2; chunk_len--) { j = cpalmdoc_rfind(b->data, i, chunk_len); - if (j < i) { + dist = i - j; + if (j < i && dist <= 2047) { found = true; - dist = i - j; compound = (dist << 3) + chunk_len-3; *(output++) = CHAR(0x80 + (compound >> 8 )); *(output++) = CHAR(compound & 0xFF); diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index b68263ab28..38de3476d1 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -226,7 +226,7 @@ class MobiReader(object): page-break-after: always; margin: 0; display: block } ''') - self.tag_css_rules = [] + self.tag_css_rules = {} if hasattr(filename_or_stream, 'read'): stream = filename_or_stream @@ -328,10 +328,10 @@ class MobiReader(object): with open('styles.css', 'wb') as s: s.write(self.base_css_rules+'\n\n') - for rule in self.tag_css_rules: + for cls, rule in self.tag_css_rules.items(): if isinstance(rule, unicode): rule = rule.encode('utf-8') - s.write(rule+'\n\n') + s.write('.%s { %s }\n\n'%(cls, rule)) if self.book_header.exth is not None or self.embedded_mi is not None: @@ -389,6 +389,7 @@ class MobiReader(object): 'xx-large' : '6', } mobi_version = self.book_header.mobi_version + style_map = {} for i, tag in enumerate(root.iter(etree.Element)): if tag.tag in ('country-region', 'place', 'placetype', 'placename', 'state', 'city', 'street', 'address', 'content'): @@ -455,9 +456,18 @@ class MobiReader(object): except ValueError: pass if styles: - attrib['id'] = attrib.get('id', 'calibre_mr_gid%d'%i) - self.tag_css_rules.append('#%s {%s}'%(attrib['id'], - '; '.join(styles))) + cls = None + rule = '; '.join(styles) + for sel, srule in self.tag_css_rules.items(): + if srule == rule: + cls = sel + break + if cls is None: + ncls = 'calibre_%d'%i + self.tag_css_rules[ncls] = rule + cls = attrib.get('class', '') + cls = cls + (' ' if cls else '') + ncls + attrib['class'] = cls def create_opf(self, htmlfile, guide=None, root=None): mi = getattr(self.book_header.exth, 'mi', self.embedded_mi) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 81120aaf2e..783f09e5cc 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -726,6 +726,7 @@ class Manifest(object): % (self.id, self.href, self.media_type) def _parse_xhtml(self, data): + self.oeb.log.debug('Parsing', self.href, '...') # Convert to Unicode and normalize line endings data = self.oeb.decode(data) data = self.oeb.html_preprocessor(data) @@ -804,6 +805,7 @@ class Manifest(object): return data def _parse_css(self, data): + self.oeb.log.debug('Parsing', self.href, '...') data = self.oeb.decode(data) data = self.oeb.css_preprocessor(data) data = XHTML_CSS_NAMESPACE + data diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py index 00830b1a8c..dc7123446b 100644 --- a/src/calibre/ebooks/oeb/transforms/guide.py +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -41,10 +41,12 @@ class Clean(object): for x in list(self.oeb.guide): href = urldefrag(self.oeb.guide[x].href)[0] - if x.lower() != ('cover', 'titlepage'): + if x.lower() not in ('cover', 'titlepage'): try: if href not in protected_hrefs: - self.oeb.manifest.remove(self.oeb.manifest.hrefs[href]) + item = self.oeb.manifest.hrefs[href] + if item not in self.oeb.spine: + self.oeb.manifest.remove(self.oeb.manifest.hrefs[href]) except KeyError: pass self.oeb.guide.remove(x) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index d3505a5fd9..21d71da5bb 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -44,14 +44,14 @@ class Split(object): self.split_on_page_breaks = split_on_page_breaks self.page_breaks_xpath = page_breaks_xpath self.max_flow_size = max_flow_size + self.page_break_selectors = None if self.page_breaks_xpath is not None: - self.page_breaks_xpath = XPath(self.page_breaks_xpath) + self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)] def __call__(self, oeb, context): self.oeb = oeb self.log = oeb.log self.map = {} - self.page_break_selectors = None for item in list(self.oeb.manifest.items): if item.spine_position is not None and etree.iselement(item.data): self.split_item(item) @@ -60,10 +60,7 @@ class Split(object): def split_item(self, item): if self.split_on_page_breaks: - if self.page_breaks_xpath is None: - page_breaks, page_break_ids = self.find_page_breaks(item) - else: - page_breaks, page_break_ids = self.page_breaks_xpath(item.data) + page_breaks, page_break_ids = self.find_page_breaks(item) splitter = FlowSplitter(item, page_breaks, page_break_ids, self.max_flow_size, self.oeb)