From ce9c61580b98a0b94f40a27529dca5e32becebb2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 6 Jul 2009 12:16:14 -0600
Subject: [PATCH] Initial (incomplete) implementation of periodical indexing
 for MOBI files using te recipe system

---
 src/calibre/ebooks/epub/output.py             |  4 --
 src/calibre/ebooks/metadata/__init__.py       | 12 +++-
 src/calibre/ebooks/metadata/ncx.xml           |  3 +
 src/calibre/ebooks/metadata/opf.xml           |  1 +
 src/calibre/ebooks/metadata/opf2.py           |  2 +
 src/calibre/ebooks/metadata/toc.py            | 11 ++-
 src/calibre/ebooks/mobi/output.py             | 70 +++++++++++++++++--
 src/calibre/ebooks/oeb/base.py                |  3 +-
 src/calibre/ebooks/oeb/reader.py              |  4 +-
 src/calibre/ebooks/oeb/transforms/metadata.py |  7 +-
 .../ebooks/oeb/transforms/structure.py        |  1 +
 .../ebooks/oeb/transforms/trimmanifest.py     |  3 -
 src/calibre/gui2/convert/mobi_output.py       |  2 +-
 src/calibre/gui2/convert/mobi_output.ui       |  9 +--
 src/calibre/web/feeds/news.py                 | 16 ++++-
 15 files changed, 116 insertions(+), 32 deletions(-)

diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index b207ac4dcb..160676137e 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -225,8 +225,6 @@ class EPUBOutput(OutputFormatPlugin):
             if 'titlepage' in self.oeb.guide.refs:
                 self.oeb.guide.refs['titlepage'].href = item.href
 
-
-
     def condense_ncx(self, ncx_path):
         if not self.opts.pretty_print:
             tree = etree.parse(ncx_path)
@@ -238,8 +236,6 @@ class EPUBOutput(OutputFormatPlugin):
             compressed = etree.tostring(tree.getroot(), encoding='utf-8')
             open(ncx_path, 'wb').write(compressed)
 
-
-
     def workaround_ade_quirks(self):
         '''
         Perform various markup transforms to get the output to render correctly
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index 134a0c3c23..5f575eb2a9 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -218,7 +218,7 @@ class MetaInformation(object):
                      'isbn', 'tags', 'cover_data', 'application_id', 'guide',
                      'manifest', 'spine', 'toc', 'cover', 'language',
                      'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc',
-                     'pubdate'):
+                     'pubdate', 'rights', 'publication_type'):
             if hasattr(mi, attr):
                 setattr(ans, attr, getattr(mi, attr))
 
@@ -243,7 +243,8 @@ class MetaInformation(object):
         for x in ('author_sort', 'title_sort', 'comments', 'category', 'publisher',
                   'series', 'series_index', 'rating', 'isbn', 'language',
                   'application_id', 'manifest', 'toc', 'spine', 'guide', 'cover',
-                  'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'
+                  'book_producer', 'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate',
+                  'rights', 'publication_type',
                   ):
             setattr(self, x, getattr(mi, x, None))
 
@@ -262,7 +263,8 @@ class MetaInformation(object):
                      'publisher', 'series', 'series_index', 'rating',
                      'isbn', 'application_id', 'manifest', 'spine', 'toc',
                      'cover', 'language', 'guide', 'book_producer',
-                     'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate'):
+                     'timestamp', 'lccn', 'lcc', 'ddc', 'pubdate', 'rights',
+                     'publication_type'):
             if hasattr(mi, attr):
                 val = getattr(mi, attr)
                 if val is not None:
@@ -332,6 +334,8 @@ class MetaInformation(object):
             fmt('Timestamp', self.timestamp.isoformat(' '))
         if self.pubdate is not None:
             fmt('Published', self.pubdate.isoformat(' '))
+        if self.rights is not None:
+            fmt('Rights', unicode(self.rights))
         if self.lccn:
             fmt('LCCN', unicode(self.lccn))
         if self.lcc:
@@ -362,6 +366,8 @@ class MetaInformation(object):
             ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
         if self.pubdate is not None:
             ans += [(_('Published'), unicode(self.pubdate.isoformat(' ')))]
+        if self.rights is not None:
+            ans += [(_('Rights'), unicode(self.rights.isoformat(' ')))]
         for i, x in enumerate(ans):
             ans[i] = u'<tr><td><b>%s</b></td><td>%s</td></tr>'%x
         return u'<table>%s</table>'%u'\n'.join(ans)
diff --git a/src/calibre/ebooks/metadata/ncx.xml b/src/calibre/ebooks/metadata/ncx.xml
index f1e0b3dbb4..bcbcb432d4 100644
--- a/src/calibre/ebooks/metadata/ncx.xml
+++ b/src/calibre/ebooks/metadata/ncx.xml
@@ -7,6 +7,7 @@ import re
      xml:lang="en" 
      xmlns="http://www.daisy.org/z3986/2005/ncx/"
      xmlns:py="http://genshi.edgewall.org/"
+     xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata"
 >
     <head>
         <meta name="dtb:uid" content="${uid}"/>
@@ -23,6 +24,8 @@ import re
                 ${'%*s'%(4*level,'')}<text>${re.sub(r'\s+', ' ', np.text)}</text>
             ${'%*s'%(4*level,'')}</navLabel>
             ${'%*s'%(4*level,'')}<content src="${unicode(np.href)+(('#' + unicode(np.fragment)) if np.fragment else '')}" />
+            ${'%*s'%(4*level,'')}<calibre:meta py:if="np.author" name="author">${np.author}</calibre:meta>
+            ${'%*s'%(4*level,'')}<calibre:meta py:if="np.description" name="description">${np.description}</calibre:meta>
             <py:for each="np2 in np">${navpoint(np2, level+1)}</py:for>
         ${'%*s'%(4*level,'')}</navPoint>
     </py:def>
diff --git a/src/calibre/ebooks/metadata/opf.xml b/src/calibre/ebooks/metadata/opf.xml
index 7acf0f5c78..c79ac0f09f 100644
--- a/src/calibre/ebooks/metadata/opf.xml
+++ b/src/calibre/ebooks/metadata/opf.xml
@@ -19,6 +19,7 @@
         <meta py:if="mi.series_index is not None" name="calibre:series_index" content="${mi.format_series_index()}"/>
         <meta py:if="mi.rating is not None" name="calibre:rating" content="${mi.rating}"/>
         <meta py:if="mi.timestamp is not None" name="calibre:timestamp" content="${mi.timestamp.isoformat()}"/>
+        <meta py:if="mi.publication_type is not None" name="calibre:publication_type" content="${mi.publication_type}" />
         <py:for each="tag in mi.tags">
         <dc:subject py:if="mi.tags is not None">${tag}</dc:subject>
         </py:for>
diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index f801dbf65c..c147c2b748 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -440,10 +440,12 @@ class OPF(object):
     language        = MetadataField('language')
     comments        = MetadataField('description')
     category        = MetadataField('category')
+    rights          = MetadataField('rights')
     series          = MetadataField('series', is_dc=False)
     series_index    = MetadataField('series_index', is_dc=False, formatter=float, none_is=1)
     rating          = MetadataField('rating', is_dc=False, formatter=int)
     pubdate         = MetadataField('date', formatter=parser.parse)
+    publication_type = MetadataField('publication_type', is_dc=False)
     timestamp       = MetadataField('timestamp', is_dc=False, formatter=parser.parse)
 
 
diff --git a/src/calibre/ebooks/metadata/toc.py b/src/calibre/ebooks/metadata/toc.py
index 65e95f4c6b..8f9edde011 100644
--- a/src/calibre/ebooks/metadata/toc.py
+++ b/src/calibre/ebooks/metadata/toc.py
@@ -21,7 +21,8 @@ class NCXSoup(BeautifulStoneSoup):
 class TOC(list):
 
     def __init__(self, href=None, fragment=None, text=None, parent=None, play_order=0,
-                 base_path=os.getcwd(), type='unknown'):
+                 base_path=os.getcwd(), type='unknown', author=None,
+                 description=None):
         self.href = href
         self.fragment = fragment
         if not self.fragment:
@@ -31,6 +32,8 @@ class TOC(list):
         self.base_path = base_path
         self.play_order = play_order
         self.type = type
+        self.author = author
+        self.description = description
 
     def __str__(self):
         lines = ['TOC: %s#%s'%(self.href, self.fragment)]
@@ -59,11 +62,13 @@ class TOC(list):
         list.remove(self, entry)
         entry.parent = None
 
-    def add_item(self, href, fragment, text, play_order=None, type='unknown'):
+    def add_item(self, href, fragment, text, play_order=None, type='unknown',
+            author=None, description=None):
         if play_order is None:
             play_order = (self[-1].play_order if len(self) else self.play_order) + 1
         self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
-                        base_path=self.base_path, play_order=play_order, type=type))
+                        base_path=self.base_path, play_order=play_order,
+                        type=type, author=author, description=description))
         return self[-1]
 
     def top_level_items(self):
diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py
index 4c272f75d1..2035df261a 100644
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@@ -6,6 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+from cStringIO import StringIO
 
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
@@ -31,16 +32,76 @@ class MOBIOutput(OutputFormatPlugin):
         OptionRecommendation(name='toc_title', recommended_value=None,
             help=_('Title for any generated in-line table of contents.')
         ),
-        OptionRecommendation(name='mobi_periodical',
-            recommended_value=False, level=OptionRecommendation.LOW,
-            help=_('Generate a periodical rather than a book.')
-        ),
         OptionRecommendation(name='dont_compress',
             recommended_value=False, level=OptionRecommendation.LOW,
             help=_('Disable compression of the file contents.')
         ),
     ])
 
+    def check_for_periodical(self):
+        if self.oeb.metadata.publication_type and \
+            self.oeb.metadata.publication_type[0].startswith('periodical:'):
+                self.periodicalize_toc()
+                self.check_for_masthead()
+                self.opts.mobi_periodical = True
+        else:
+            self.opts.mobi_periodical = False
+
+    def check_for_masthead(self):
+        found = False
+        for typ in self.oeb.guide:
+            if type == 'masthead':
+                found = True
+                break
+        if not found:
+            self.oeb.debug('No masthead found, generating default one...')
+            from calibre.resources import server_resources
+            try:
+                from PIL import Image as PILImage
+                PILImage
+            except ImportError:
+                import Image as PILImage
+
+            raw = StringIO(server_resources['calibre.png'])
+            im = PILImage.open(raw)
+            of = StringIO()
+            im.save(of, 'GIF')
+            raw = of.getvalue()
+            id, href = self.oeb.manifest.generate('masthead', 'masthead')
+            self.oeb.manifest.add(id, href, 'image/gif', data=raw)
+            self.oeb.guide.add('masthead', 'Masthead Image', href)
+
+
+    def periodicalize_toc(self):
+        from calibre.ebooks.oeb.base import TOC
+        toc = self.oeb.toc
+        if toc and toc[0].klass != 'periodical':
+            self.log('Converting TOC for MOBI periodical indexing...')
+            articles = {}
+            if toc.depth < 3:
+                sections = [TOC(klass='section')]
+                for x in toc:
+                    sections[0].append(x)
+            else:
+                sections = list(toc)
+            for sec in sections:
+                articles[id(sec)] = []
+                for a in list(sec):
+                    articles[id(sec)].append(a)
+                    sec.nodes.remove(a)
+            root = TOC(klass='periodical', title=self.oeb.metadata.title[0])
+            for s in sections:
+                if articles[id(s)]:
+                    for a in articles[id(s)]:
+                        s.nodes.append(a)
+            root.nodes.append(s)
+
+            for x in list(toc.nodes):
+                toc.nodes.remove(x)
+
+            toc.nodes.append(root)
+
+
     def convert(self, oeb, output_path, input_plugin, opts, log):
         self.log, self.opts, self.oeb = log, opts, oeb
         from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
@@ -60,6 +121,7 @@ class MOBIOutput(OutputFormatPlugin):
         rasterizer(oeb, opts)
         mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
         mobimlizer(oeb, opts)
+        self.check_for_periodical()
         write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
         writer = MobiWriter(opts, imagemax=imagemax,
                 compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 3b38536e4f..86c0ae1ed6 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -455,7 +455,8 @@ class Metadata(object):
                          'description', 'format', 'identifier', 'language',
                          'publisher', 'relation', 'rights', 'source',
                          'subject', 'title', 'type'])
-    CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp'])
+    CALIBRE_TERMS = set(['series', 'series_index', 'rating', 'timestamp',
+                         'publication_type'])
     OPF_ATTRS     = {'role': OPF('role'), 'file-as': OPF('file-as'),
                      'scheme': OPF('scheme'), 'event': OPF('event'),
                      'type': XSI('type'), 'lang': XML('lang'), 'id': 'id'}
diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py
index 26a15c057f..6b246d0580 100644
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@@ -335,14 +335,14 @@ class OEBReader(object):
             po = int(child.get('playOrder', self.oeb.toc.next_play_order()))
 
             authorElement = xpath(child,
-                    'descendant::mbp:meta[@name = "author"]')
+                    'descendant::calibre:meta[@name = "author"]')
             if authorElement :
                 author = authorElement[0].text
             else :
                 author = None
 
             descriptionElement = xpath(child,
-                    'descendant::mbp:meta[@name = "description"]')
+                    'descendant::calibre:meta[@name = "description"]')
             if descriptionElement :
                 description = descriptionElement[0].text
             else :
diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py
index 74bf78c691..8f8a35d39b 100644
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@@ -63,11 +63,16 @@ def meta_info_to_oeb_metadata(mi, m, log):
     if mi.timestamp is not None:
         m.clear('timestamp')
         m.add('timestamp', mi.timestamp.isoformat())
+    if mi.rights is not None:
+        m.clear('rights')
+        m.add('rights', mi.rights)
+    if mi.publication_type is not None:
+        m.clear('publication_type')
+        m.add('publication_type', mi.publication_type)
     if not m.timestamp:
         m.add('timestamp', datetime.utcnow().isoformat())
 
 
-
 class MergeMetadata(object):
     'Merge in user metadata, including cover'
 
diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py
index 21be375b1a..d5d17cdd07 100644
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@@ -13,6 +13,7 @@ from urlparse import urlparse
 
 from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
 from calibre.ebooks import ConversionError
+
 def XPath(x):
     try:
         return etree.XPath(x, namespaces=XPNSMAP)
diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
index 643ce47c4d..0baacfd1f9 100644
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@@ -64,8 +64,5 @@ class ManifestTrimmer(object):
             unchecked = new
         for item in oeb.manifest.values():
             if item not in used:
-                if getattr(self.opts, 'mobi_periodical', False) and \
-                        item.href == 'images/mastheadImage.gif':
-                    continue
                 oeb.logger.info('Trimming %r from manifest' % item.href)
                 oeb.manifest.remove(item)
diff --git a/src/calibre/gui2/convert/mobi_output.py b/src/calibre/gui2/convert/mobi_output.py
index 8fa27dcef1..f7d41957b2 100644
--- a/src/calibre/gui2/convert/mobi_output.py
+++ b/src/calibre/gui2/convert/mobi_output.py
@@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
     def __init__(self, parent, get_option, get_help, db=None, book_id=None):
         Widget.__init__(self, parent, 'mobi_output',
                 ['prefer_author_sort', 'rescale_images', 'toc_title',
-                'dont_compress', 'mobi_periodical']
+                'dont_compress',]
                 )
         self.db, self.book_id = db, book_id
         self.initialize_options(get_option, get_help, db, book_id)
diff --git a/src/calibre/gui2/convert/mobi_output.ui b/src/calibre/gui2/convert/mobi_output.ui
index 8bd205e9dd..a1bad48fb0 100644
--- a/src/calibre/gui2/convert/mobi_output.ui
+++ b/src/calibre/gui2/convert/mobi_output.ui
@@ -41,7 +41,7 @@
      </property>
     </widget>
    </item>
-   <item row="6" column="0">
+   <item row="5" column="0">
     <spacer name="verticalSpacer">
      <property name="orientation">
       <enum>Qt::Vertical</enum>
@@ -61,13 +61,6 @@
      </property>
     </widget>
    </item>
-   <item row="5" column="0">
-    <widget class="QCheckBox" name="opt_mobi_periodical">
-     <property name="text">
-      <string>Generate a periodical rather than a book</string>
-     </property>
-    </widget>
-   </item>
    <item row="0" column="0">
     <widget class="QCheckBox" name="opt_no_inline_toc">
      <property name="text">
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index 9d2357ddfd..17bff315d4 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -64,6 +64,10 @@ class BasicNewsRecipe(Recipe):
     #: Delay between consecutive downloads in seconds
     delay                  = 0
 
+    #: Publication type
+    #: Set to newspaper, magazine or blog
+    publication_type = 'unknown'
+
     #: Number of simultaneous downloads. Set to 1 if the server is picky.
     #: Automatically reduced to 1 if :attr:`BasicNewsRecipe.delay` > 0
     simultaneous_downloads = 5
@@ -848,6 +852,7 @@ class BasicNewsRecipe(Recipe):
         mi = MetaInformation(self.title + strftime(self.timefmt), [__appname__])
         mi.publisher = __appname__
         mi.author_sort = __appname__
+        mi.publication_type = 'periodical:'+self.publication_type
         opf_path = os.path.join(dir, 'index.opf')
         ncx_path = os.path.join(dir, 'index.ncx')
         opf = OPFCreator(dir, mi)
@@ -878,13 +883,16 @@ class BasicNewsRecipe(Recipe):
             for j, a in enumerate(f):
                 if getattr(a, 'downloaded', False):
                     adir = 'feed_%d/article_%d/'%(num, j)
+                    desc = a.text_summary
+                    if not desc:
+                        desc = None
                     entries.append('%sindex.html'%adir)
                     po = self.play_order_map.get(entries[-1], None)
                     if po is None:
                         self.play_order_counter += 1
                         po = self.play_order_counter
                     parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
-                                    play_order=po)
+                                    play_order=po, description=desc)
                     last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
                     for sp in a.sub_pages:
                         prefix = os.path.commonprefix([opf_path, sp])
@@ -915,7 +923,11 @@ class BasicNewsRecipe(Recipe):
                 if po is None:
                     self.play_order_counter += 1
                     po = self.play_order_counter
-                feed_index(i, toc.add_item('feed_%d/index.html'%i, None, f.title, play_order=po))
+                desc = f.description
+                if not desc:
+                    desc = None
+                feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
+                    f.title, play_order=po, description=desc))
         else:
             entries.append('feed_%d/index.html'%0)
             feed_index(0, toc)