From ce9c61580b98a0b94f40a27529dca5e32becebb2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 6 Jul 2009 12:16:14 -0600 Subject: [PATCH] Initial (incomplete) implementation of periodical indexing for MOBI files using te recipe system --- src/calibre/ebooks/epub/output.py | 4 -- src/calibre/ebooks/metadata/__init__.py | 12 +++- src/calibre/ebooks/metadata/ncx.xml | 3 + src/calibre/ebooks/metadata/opf.xml | 1 + src/calibre/ebooks/metadata/opf2.py | 2 + src/calibre/ebooks/metadata/toc.py | 11 ++- src/calibre/ebooks/mobi/output.py | 70 +++++++++++++++++-- src/calibre/ebooks/oeb/base.py | 3 +- src/calibre/ebooks/oeb/reader.py | 4 +- src/calibre/ebooks/oeb/transforms/metadata.py | 7 +- .../ebooks/oeb/transforms/structure.py | 1 + .../ebooks/oeb/transforms/trimmanifest.py | 3 - src/calibre/gui2/convert/mobi_output.py | 2 +- src/calibre/gui2/convert/mobi_output.ui | 9 +-- src/calibre/web/feeds/news.py | 16 ++++- 15 files changed, 116 insertions(+), 32 deletions(-) diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index b207ac4dcb..160676137e 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -225,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin): if 'titlepage' in self.oeb.guide.refs: self.oeb.guide.refs['titlepage'].href = item.href - - def condense_ncx(self, ncx_path): if not self.opts.pretty_print: tree = etree.parse(ncx_path) @@ -238,8 +236,6 @@ class EPUBOutput(OutputFormatPlugin): compressed = etree.tostring(tree.getroot(), encoding='utf-8') open(ncx_path, 'wb').write(compressed) - - def workaround_ade_quirks(self): ''' Perform various markup transforms to get the output to render correctly diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index 134a0c3c23..5f575eb2a9 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -218,7 +218,7 @@ class MetaInformation(object): 'isbn', 'tags', 'cover_data', 'application_id', 'guide', 'manifest', 'spine', 'toc', 'cover', 'language', 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', - 'pubdate'): + 'pubdate', 'rights', 'publication_type'): if hasattr(mi, attr): setattr(ans, attr, getattr(mi, attr)) @@ -243,7 +243,8 @@ class MetaInformation(object): for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher', 'series', 'series_index', 'rating', 'isbn', 'language', 'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover', - 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate' + 'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', + 'rights', 'publication_type', ): setattr(self, x, getattr(mi, x, None)) @@ -262,7 +263,8 @@ class MetaInformation(object): 'publisher', 'series', 'series_index', 'rating', 'isbn', 'application_id', 'manifest', 'spine', 'toc', 'cover', 'language', 'guide', 'book_producer', - 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'): + 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights', + 'publication_type'): if hasattr(mi, attr): val = getattr(mi, attr) if val is not None: @@ -332,6 +334,8 @@ class MetaInformation(object): fmt('Timestamp', self.timestamp.isoformat(' ')) if self.pubdate is not None: fmt('Published', self.pubdate.isoformat(' ')) + if self.rights is not None: + fmt('Rights', unicode(self.rights)) if self.lccn: fmt('LCCN', unicode(self.lccn)) if self.lcc: @@ -362,6 +366,8 @@ class MetaInformation(object): ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))] if self.pubdate is not None: ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))] + if self.rights is not None: + ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))] for i, x in enumerate(ans): ans[i] = u'%s%s'%x return u'%s
'%u'\n'.join(ans) diff --git a/src/calibre/ebooks/metadata/ncx.xml b/src/calibre/ebooks/metadata/ncx.xml index f1e0b3dbb4..bcbcb432d4 100644 --- a/src/calibre/ebooks/metadata/ncx.xml +++ b/src/calibre/ebooks/metadata/ncx.xml @@ -7,6 +7,7 @@ import re xml:lang="en" xmlns="http://www.daisy.org/z3986/2005/ncx/" xmlns:py="http://genshi.edgewall.org/" + xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata" > @@ -23,6 +24,8 @@ import re ${'%*s'%(4*level,'')}${re.sub(r'\s+', ' ', np.text)} ${'%*s'%(4*level,'')} ${'%*s'%(4*level,'')} + ${'%*s'%(4*level,'')}${np.author} + ${'%*s'%(4*level,'')}${np.description} ${navpoint(np2, level+1)} ${'%*s'%(4*level,'')} diff --git a/src/calibre/ebooks/metadata/opf.xml b/src/calibre/ebooks/metadata/opf.xml index 7acf0f5c78..c79ac0f09f 100644 --- a/src/calibre/ebooks/metadata/opf.xml +++ b/src/calibre/ebooks/metadata/opf.xml @@ -19,6 +19,7 @@ + ${tag} diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index f801dbf65c..c147c2b748 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -440,10 +440,12 @@ class OPF(object): language = MetadataField('language') comments = MetadataField('description') category = MetadataField('category') + rights = MetadataField('rights') series = MetadataField('series', is_dc=False) series_index = MetadataField('series_index', is_dc=False, formatter=float, none_is=1) rating = MetadataField('rating', is_dc=False, formatter=int) pubdate = MetadataField('date', formatter=parser.parse) + publication_type = MetadataField('publication_type', is_dc=False) timestamp = MetadataField('timestamp', is_dc=False, formatter=parser.parse) diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py index 65e95f4c6b..8f9edde011 100644 --- a/src/calibre/ebooks/metadata/toc.py +++ b/src/calibre/ebooks/metadata/toc.py @@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup): class TOC(list): def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0, - base_path=os.getcwd(), type='unknown'): + base_path=os.getcwd(), type='unknown', author=None, + description=None): self.href = href self.fragment = fragment if not self.fragment: @@ -31,6 +32,8 @@ class TOC(list): self.base_path = base_path self.play_order = play_order self.type = type + self.author = author + self.description = description def __str__(self): lines = ['TOC: %s#%s'%(self.href, self.fragment)] @@ -59,11 +62,13 @@ class TOC(list): list.remove(self, entry) entry.parent = None - def add_item(self, href, fragment, text, play_order=None, type='unknown'): + def add_item(self, href, fragment, text, play_order=None, type='unknown', + author=None, description=None): if play_order is None: play_order = (self[-1].play_order if len(self) else self.play_order) + 1 self.append(TOC(href=href, fragment=fragment, text=text, parent=self, - base_path=self.base_path, play_order=play_order, type=type)) + base_path=self.base_path, play_order=play_order, + type=type, author=author, description=description)) return self[-1] def top_level_items(self): diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py index 4c272f75d1..2035df261a 100644 --- a/src/calibre/ebooks/mobi/output.py +++ b/src/calibre/ebooks/mobi/output.py @@ -6,6 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +from cStringIO import StringIO from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OptionRecommendation @@ -31,16 +32,76 @@ class MOBIOutput(OutputFormatPlugin): OptionRecommendation(name='toc_title', recommended_value=None, help=_('Title for any generated in-line table of contents.') ), - OptionRecommendation(name='mobi_periodical', - recommended_value=False, level=OptionRecommendation.LOW, - help=_('Generate a periodical rather than a book.') - ), OptionRecommendation(name='dont_compress', recommended_value=False, level=OptionRecommendation.LOW, help=_('Disable compression of the file contents.') ), ]) + def check_for_periodical(self): + if self.oeb.metadata.publication_type and \ + self.oeb.metadata.publication_type[0].startswith('periodical:'): + self.periodicalize_toc() + self.check_for_masthead() + self.opts.mobi_periodical = True + else: + self.opts.mobi_periodical = False + + def check_for_masthead(self): + found = False + for typ in self.oeb.guide: + if type == 'masthead': + found = True + break + if not found: + self.oeb.debug('No masthead found, generating default one...') + from calibre.resources import server_resources + try: + from PIL import Image as PILImage + PILImage + except ImportError: + import Image as PILImage + + raw = StringIO(server_resources['calibre.png']) + im = PILImage.open(raw) + of = StringIO() + im.save(of, 'GIF') + raw = of.getvalue() + id, href = self.oeb.manifest.generate('masthead', 'masthead') + self.oeb.manifest.add(id, href, 'image/gif', data=raw) + self.oeb.guide.add('masthead', 'Masthead Image', href) + + + def periodicalize_toc(self): + from calibre.ebooks.oeb.base import TOC + toc = self.oeb.toc + if toc and toc[0].klass != 'periodical': + self.log('Converting TOC for MOBI periodical indexing...') + articles = {} + if toc.depth < 3: + sections = [TOC(klass='section')] + for x in toc: + sections[0].append(x) + else: + sections = list(toc) + for sec in sections: + articles[id(sec)] = [] + for a in list(sec): + articles[id(sec)].append(a) + sec.nodes.remove(a) + root = TOC(klass='periodical', title=self.oeb.metadata.title[0]) + for s in sections: + if articles[id(s)]: + for a in articles[id(s)]: + s.nodes.append(a) + root.nodes.append(s) + + for x in list(toc.nodes): + toc.nodes.remove(x) + + toc.nodes.append(root) + + def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \ @@ -60,6 +121,7 @@ class MOBIOutput(OutputFormatPlugin): rasterizer(oeb, opts) mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables) mobimlizer(oeb, opts) + self.check_for_periodical() write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz') writer = MobiWriter(opts, imagemax=imagemax, compression=UNCOMPRESSED if opts.dont_compress else PALMDOC, diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 3b38536e4f..86c0ae1ed6 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -455,7 +455,8 @@ class Metadata(object): 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']) - CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp']) + CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp', + 'publication_type']) OPF_ATTRS = {'role': OPF('role'), 'file-as': OPF('file-as'), 'scheme': OPF('scheme'), 'event': OPF('event'), 'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'} diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 26a15c057f..6b246d0580 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -335,14 +335,14 @@ class OEBReader(object): po = int(child.get('playOrder', self.oeb.toc.next_play_order())) authorElement = xpath(child, - 'descendant::mbp:meta[@name = "author"]') + 'descendant::calibre:meta[@name = "author"]') if authorElement : author = authorElement[0].text else : author = None descriptionElement = xpath(child, - 'descendant::mbp:meta[@name = "description"]') + 'descendant::calibre:meta[@name = "description"]') if descriptionElement : description = descriptionElement[0].text else : diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index 74bf78c691..8f8a35d39b 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log): if mi.timestamp is not None: m.clear('timestamp') m.add('timestamp', mi.timestamp.isoformat()) + if mi.rights is not None: + m.clear('rights') + m.add('rights', mi.rights) + if mi.publication_type is not None: + m.clear('publication_type') + m.add('publication_type', mi.publication_type) if not m.timestamp: m.add('timestamp', datetime.utcnow().isoformat()) - class MergeMetadata(object): 'Merge in user metadata, including cover' diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index 21be375b1a..d5d17cdd07 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -13,6 +13,7 @@ from urlparse import urlparse from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML from calibre.ebooks import ConversionError + def XPath(x): try: return etree.XPath(x, namespaces=XPNSMAP) diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index 643ce47c4d..0baacfd1f9 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -64,8 +64,5 @@ class ManifestTrimmer(object): unchecked = new for item in oeb.manifest.values(): if item not in used: - if getattr(self.opts, 'mobi_periodical', False) and \ - item.href == 'images/mastheadImage.gif': - continue oeb.logger.info('Trimming %r from manifest' % item.href) oeb.manifest.remove(item) diff --git a/src/calibre/gui2/convert/mobi_output.py b/src/calibre/gui2/convert/mobi_output.py index 8fa27dcef1..f7d41957b2 100644 --- a/src/calibre/gui2/convert/mobi_output.py +++ b/src/calibre/gui2/convert/mobi_output.py @@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, 'mobi_output', ['prefer_author_sort', 'rescale_images', 'toc_title', - 'dont_compress', 'mobi_periodical'] + 'dont_compress',] ) self.db, self.book_id = db, book_id self.initialize_options(get_option, get_help, db, book_id) diff --git a/src/calibre/gui2/convert/mobi_output.ui b/src/calibre/gui2/convert/mobi_output.ui index 8bd205e9dd..a1bad48fb0 100644 --- a/src/calibre/gui2/convert/mobi_output.ui +++ b/src/calibre/gui2/convert/mobi_output.ui @@ -41,7 +41,7 @@ - + Qt::Vertical @@ -61,13 +61,6 @@ - - - - Generate a periodical rather than a book - - - diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 9d2357ddfd..17bff315d4 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe): #: Delay between consecutive downloads in seconds delay = 0 + #: Publication type + #: Set to newspaper, magazine or blog + publication_type = 'unknown' + #: Number of simultaneous downloads. Set to 1 if the server is picky. #: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0 simultaneous_downloads = 5 @@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe): mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__]) mi.publisher = __appname__ mi.author_sort = __appname__ + mi.publication_type = 'periodical:'+self.publication_type opf_path = os.path.join(dir, 'index.opf') ncx_path = os.path.join(dir, 'index.ncx') opf = OPFCreator(dir, mi) @@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe): for j, a in enumerate(f): if getattr(a, 'downloaded', False): adir = 'feed_%d/article_%d/'%(num, j) + desc = a.text_summary + if not desc: + desc = None entries.append('%sindex.html'%adir) po = self.play_order_map.get(entries[-1], None) if po is None: self.play_order_counter += 1 po = self.play_order_counter parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'), - play_order=po) + play_order=po, description=desc) last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep)) for sp in a.sub_pages: prefix = os.path.commonprefix([opf_path, sp]) @@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe): if po is None: self.play_order_counter += 1 po = self.play_order_counter - feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po)) + desc = f.description + if not desc: + desc = None + feed_index(i, toc.add_item('feed_%d/index.html'%i, None, + f.title, play_order=po, description=desc)) else: entries.append('feed_%d/index.html'%0) feed_index(0, toc)