From 77d0747a6f40024b83d387458e25a61fc90fb454 Mon Sep 17 00:00:00 2001 From: GRiker Date: Thu, 28 Jan 2010 03:05:33 -0700 Subject: [PATCH 1/3] KG updates --- src/calibre/ebooks/oeb/transforms/guide.py | 5 ++ src/calibre/library/catalog.py | 3 +- src/calibre/web/feeds/news.py | 79 ++++++++++++++++++++-- 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py index 3941904fdc..069077554c 100644 --- a/src/calibre/ebooks/oeb/transforms/guide.py +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -32,6 +32,11 @@ class Clean(object): ref.type = 'cover' self.oeb.guide.refs['cover'] = ref + # Check for masthead in OPF, add to guide if present + for item in oeb.manifest.items: + if item.id == 'masthead-image': + self.oeb.guide.add('masthead', 'Masthead Image', item.href) + for x in list(self.oeb.guide): href = urldefrag(self.oeb.guide[x].href)[0] if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc', diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index 98ab27c3c7..529524371f 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -1773,7 +1773,8 @@ class EPUB_MOBI(CatalogPlugin): self.playOrder += 1 navLabelTag = Tag(soup, 'navLabel') textTag = Tag(soup, 'text') - textTag.insert(0, NavigableString("Titles beginning with %s" % (title_letters[i]))) + textTag.insert(0, NavigableString("Titles beginning with %s" % \ + (title_letters[i] if len(title_letters[i])>1 else "'" + title_letters[i] + "'"))) navLabelTag.insert(0, textTag) navPointByLetterTag.insert(0,navLabelTag) contentTag = Tag(soup, 'content') diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 9abb55852e..3328445fd8 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -111,9 +111,7 @@ class BasicNewsRecipe(Recipe): #: Specify an override encoding for sites that have an incorrect #: charset specification. The most common being specifying ``latin1`` and - #: using ``cp1252``. If None, try to detect the encoding. If it is a - #: callable, the callable is called with two arguments: The recipe object - #: and the source to be decoded. It must return the decoded source. + #: using ``cp1252``. If None, try to detect the encoding. encoding = None #: Normally we try to guess if a feed has full articles embedded in it @@ -294,6 +292,17 @@ class BasicNewsRecipe(Recipe): ''' return getattr(self, 'cover_url', None) + def get_masthead_url(self): + ''' + Return a :term:`URL` to the masthead image for this issue or `None`. + By default it returns the value of the member `self.masthead_url` which + is normally `None`. If you want your recipe to download a masthead for the e-book + override this method in your subclass, or set the member variable `self.masthead_url` + before this method is called. + Masthead images are used in Kindle MOBI files. + ''' + return getattr(self, 'masthead_url', None) + def get_feeds(self): ''' Return a list of :term:`RSS` feeds to fetch for this profile. Each element of the list @@ -423,10 +432,7 @@ class BasicNewsRecipe(Recipe): if raw: return _raw if not isinstance(_raw, unicode) and self.encoding: - if callable(self.encoding): - _raw = self.encoding(_raw) - else: - _raw = _raw.decode(self.encoding, 'replace') + _raw = _raw.decode(self.encoding, 'replace') massage = list(BeautifulSoup.MARKUP_MASSAGE) massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) return BeautifulSoup(_raw, markupMassage=massage) @@ -743,6 +749,9 @@ class BasicNewsRecipe(Recipe): self.report_progress(0, _('Trying to download cover...')) self.download_cover() + self.report_progress(0, _('Trying to download masthead...')) + self.download_masthead() + if self.test: feeds = feeds[:2] self.has_single_feed = len(feeds) == 1 @@ -859,7 +868,50 @@ class BasicNewsRecipe(Recipe): self.log.exception('Failed to download cover') self.cover_path = None + def convert_image(self, name): + image_ext = name.rpartition('.')[2].lower() + if image_ext in ['jpg','jpeg']: + return name + import calibre.utils.PythonMagickWand as p + img = p.NewMagickWand() + if img < 0: + raise RuntimeError('Cannot create wand.') + if not p.MagickReadImage(img, name): + self.log.warn('Failed to read image:', name) + name = name.replace('.%s' % image_ext, '.jpg') + p.MagickWriteImage(img, name) + return name + def _download_masthead(self): + self.masthead_path = None + try: + mu = self.get_masthead_url() + except Exception, err: + mu = None + self.log.error(_('Could not download masthead: %s')%str(err)) + self.log.debug(traceback.format_exc()) + if mu is not None: + ext = mu.rpartition('.')[-1] + if '?' in ext: + ext = '' + ext = ext.lower() if ext else 'jpg' + mpath = os.path.join(self.output_dir, 'mastheadImage.'+ext) + if os.access(mu, os.R_OK): + with open(mpath, 'wb') as mfile: + mfile.write(open(mu, 'rb').read()) + else: + self.report_progress(1, _('Downloading masthead from %s')%mu) + with nested(open(mpath, 'wb'), closing(self.browser.open(mu))) as (mfile, r): + mfile.write(r.read()) + self.masthead_path = self.convert_image(mpath) + + + def download_masthead(self): + try: + self._download_masthead() + except: + self.log.exception('Failed to download masthead') + self.masthead_path = None def default_cover(self, cover_file): ''' @@ -944,6 +996,8 @@ class BasicNewsRecipe(Recipe): manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))] manifest.append(os.path.join(dir, 'index.html')) manifest.append(os.path.join(dir, 'index.ncx')) + + # Get cover cpath = getattr(self, 'cover_path', None) if cpath is None: pf = open(os.path.join(dir, 'cover.jpg'), 'wb') @@ -952,10 +1006,21 @@ class BasicNewsRecipe(Recipe): if cpath is not None and os.access(cpath, os.R_OK): opf.cover = cpath manifest.append(cpath) + + # Get masthead + mpath = getattr(self, 'masthead_path', None) + print "\ncreate_opf(): masthead: %s\n" % mpath + if mpath is not None and os.access(mpath, os.R_OK): + manifest.append(mpath) + opf.manifest = mpath + opf.create_manifest_from_files_in(manifest) for mani in opf.manifest: if mani.path.endswith('.ncx'): mani.id = 'ncx' + if mani.path.endswith('mastheadImage.jpg'): + mani.id = 'masthead-image' + entries = ['index.html'] toc = TOC(base_path=dir) From b9521c819d0a45c5d47ac7eb18b4b636f16e8dc1 Mon Sep 17 00:00:00 2001 From: GRiker Date: Thu, 28 Jan 2010 06:47:41 -0700 Subject: [PATCH 2/3] GwR revisions for mastheadImage, Catalog tweaks --- src/calibre/gui2/catalog/catalog_epub_mobi.ui | 81 +++++++++++++------ src/calibre/gui2/dialogs/catalog.ui | 2 +- src/calibre/web/feeds/news.py | 3 + 3 files changed, 59 insertions(+), 27 deletions(-) diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.ui b/src/calibre/gui2/catalog/catalog_epub_mobi.ui index 858aec429f..044ecdaaec 100644 --- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui +++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui @@ -14,19 +14,6 @@ Form - - - - Tags to exclude as genres (regex): - - - Qt::LogText - - - true - - - @@ -37,7 +24,7 @@ - + @@ -51,7 +38,7 @@ - + @@ -65,18 +52,67 @@ - + - + + + + Sort numbers as text + + + + - + - + + + + Regex pattern describing tags to exclude as genres: + + + Qt::LogText + + + true + + + + + + + + 14 + 75 + true + + + + Special marker tags for catalog generation + + + Qt::AlignCenter + + + + + + + Regex tips: +- The default regex of '\[[\w]*\]' ignores tags of the form '[tag]', e.g., '[Amazon Freebie]' +- A regex of '.' ignores all tags, generating no genre categories in the catalog + + + true + + + + Qt::Vertical @@ -89,13 +125,6 @@ - - - - Sort numbers as text - - - diff --git a/src/calibre/gui2/dialogs/catalog.ui b/src/calibre/gui2/dialogs/catalog.ui index c18e08ef65..3d62f36e85 100644 --- a/src/calibre/gui2/dialogs/catalog.ui +++ b/src/calibre/gui2/dialogs/catalog.ui @@ -107,7 +107,7 @@ 12 12 - 205 + 301 17 diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 3328445fd8..00869409ea 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -872,6 +872,8 @@ class BasicNewsRecipe(Recipe): image_ext = name.rpartition('.')[2].lower() if image_ext in ['jpg','jpeg']: return name + if image_ext not in ['gif']: + raise RuntimeError("web.feeds.news:BasicNewsRecipe.convert_image(): '%s' is not a supported mastheadImage format" % image_ext) import calibre.utils.PythonMagickWand as p img = p.NewMagickWand() if img < 0: @@ -880,6 +882,7 @@ class BasicNewsRecipe(Recipe): self.log.warn('Failed to read image:', name) name = name.replace('.%s' % image_ext, '.jpg') p.MagickWriteImage(img, name) + p.DestroyMagickWand(img) return name def _download_masthead(self): From 192e6a2f5dafdf670c1cae491ee8d4f974744e7a Mon Sep 17 00:00:00 2001 From: GRiker Date: Thu, 28 Jan 2010 12:54:49 -0700 Subject: [PATCH 3/3] GR changes for mastheadImage --- src/calibre/ebooks/mobi/output.py | 5 ++++- src/calibre/ebooks/oeb/transforms/guide.py | 6 +----- src/calibre/web/feeds/news.py | 8 ++++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py index 7e4643dac1..e3a1da34cc 100644 --- a/src/calibre/ebooks/mobi/output.py +++ b/src/calibre/ebooks/mobi/output.py @@ -50,7 +50,7 @@ class MOBIOutput(OutputFormatPlugin): def check_for_masthead(self): found = 'masthead' in self.oeb.guide if not found: - self.oeb.log.debug('No masthead found, generating default one...') + self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...') try: from PIL import Image as PILImage PILImage @@ -65,6 +65,9 @@ class MOBIOutput(OutputFormatPlugin): id, href = self.oeb.manifest.generate('masthead', 'masthead') self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.guide.add('masthead', 'Masthead Image', href) + else: + self.oeb.log.debug('Using mastheadImage supplied in manifest...') + def dump_toc(self, toc) : self.log( "\n >>> TOC contents <<<") diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py index 069077554c..e911fe9cf7 100644 --- a/src/calibre/ebooks/oeb/transforms/guide.py +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -32,13 +32,9 @@ class Clean(object): ref.type = 'cover' self.oeb.guide.refs['cover'] = ref - # Check for masthead in OPF, add to guide if present - for item in oeb.manifest.items: - if item.id == 'masthead-image': - self.oeb.guide.add('masthead', 'Masthead Image', item.href) - for x in list(self.oeb.guide): href = urldefrag(self.oeb.guide[x].href)[0] + print "ebooks.oeb.transforms.guide:Clean(): checking x.lower(): %s" % x.lower() if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc', 'title-page', 'copyright-page', 'start'): self.oeb.guide.remove(x) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 00869409ea..f8be8462c6 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -994,6 +994,14 @@ class BasicNewsRecipe(Recipe): mi.pubdate = datetime.now() opf_path = os.path.join(dir, 'index.opf') ncx_path = os.path.join(dir, 'index.ncx') + + # Add mastheadImage entry to section + from calibre.ebooks.metadata.opf2 import Guide + mi.guide = Guide() + ref = Guide.Reference('mastheadImage.jpg', os.getcwdu()) + ref.type = 'masthead' + ref.title = 'Masthead Image' + mi.guide.append(ref) opf = OPFCreator(dir, mi) manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]