Support for the SONY periodical format. Now news downloaded with calibre will appear in the Periodicals section of your SONY and will have the special periodical navigation enabled.

2025-07-09 03:04:10 -04:00 · 2010-10-18 20:02:32 -06:00 · 2010-10-18 20:02:32 -06:00 · 1ebee86c83
commit 1ebee86c83
parent 757b8fa4c0
7 changed files with 210 additions and 12 deletions
--- a/resources/recipes/atlantic.recipe
+++ b/resources/recipes/atlantic.recipe
@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
        for poem in soup.findAll('div', attrs={'class':'poem'}):
            title = self.tag_to_string(poem.find('h4'))
            desc  = self.tag_to_string(poem.find(attrs={'class':'author'}))
-            url   = 'http://www.theatlantic.com'+poem.find('a')['href']
+            url   = poem.find('a')['href']
            if url.startswith('/'):
                url = 'http://www.theatlantic.com' + url
            self.log('\tFound article:', title, 'at', url)
            self.log('\t\t', desc)
            poems.append({'title':title, 'url':url, 'description':desc,
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
        if div is not None:
            self.log('Found section: Advice')
            title = self.tag_to_string(div.find('h4'))
-            url = 'http://www.theatlantic.com'+div.find('a')['href']
+            url = div.find('a')['href']
            if url.startswith('/'):
                url = 'http://www.theatlantic.com' + url
            desc = self.tag_to_string(div.find('p'))
            self.log('\tFound article:', title, 'at', url)
            self.log('\t\t', desc)
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
        '''
        raise NotImplementedError
    @property
    def is_periodical(self):
        return self.oeb.metadata.publication_type and \
            unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -15,22 +15,30 @@ def rules(stylesheets):
                if r.type == r.STYLE_RULE:
                    yield r
-def initialize_container(path_to_container, opf_name='metadata.opf'):
+def initialize_container(path_to_container, opf_name='metadata.opf',
        extra_entries=[]):
    '''
    Create an empty EPUB document, with a default skeleton.
    '''
-    CONTAINER='''\
+    rootfiles = ''
    for path, mimetype, _ in extra_entries:
        rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
                path, mimetype)
    CONTAINER = u'''\
 <?xml version="1.0"?>
 <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
-      <rootfile full-path="%s" media-type="application/oebps-package+xml"/>
+      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
      {extra_entries}
   </rootfiles>
 </container>
-    '''%opf_name
+    '''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
    zf.writestr('META-INF/', '', 0700)
    zf.writestr('META-INF/container.xml', CONTAINER)
    for path, _, data in extra_entries:
        zf.writestr(path, data)
    return zf
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
    recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
    def workaround_webkit_quirks(self): # {{{
        from calibre.ebooks.oeb.base import XPath
        for x in self.oeb.spine:
@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin):
        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            metadata_xml = None
            extra_entries = []
            if self.is_periodical:
                from calibre.ebooks.epub.periodical import sony_metadata
                metadata_xml, atom_xml = sony_metadata(oeb)
                extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin):
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
            from calibre.ebooks.epub import initialize_container
-            epub = initialize_container(output_path, os.path.basename(opf))
+            epub = initialize_container(output_path, os.path.basename(opf),
                    extra_entries=extra_entries)
            epub.add_dir(tdir)
            if encryption is not None:
                epub.writestr('META-INF/encryption.xml', encryption)
            if metadata_xml is not None:
                epub.writestr('META-INF/metadata.xml',
                        metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                if os.path.exists(opts.extract_to):
                    shutil.rmtree(opts.extract_to)
--- a/src/calibre/ebooks/epub/periodical.py
+++ b/src/calibre/ebooks/epub/periodical.py
@ -0,0 +1,170 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from uuid import uuid4
 from calibre.constants import __appname__, __version__
 from calibre import strftime, prepare_string_for_xml as xml
 SONY_METADATA = u'''\
 <?xml version="1.0" encoding="utf-8"?>
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 		 xmlns:dcterms="http://purl.org/dc/terms/"
 		 xmlns:dc="http://purl.org/dc/elements/1.1/"
 		 xmlns:prs="http://xmlns.sony.net/e-book/prs/">
 	<rdf:Description rdf:about="">
 		<dc:title>{title}</dc:title>
 		<dc:publisher>{publisher}</dc:publisher>
 		<dcterms:alternative>{short_title}</dcterms:alternative>
 		<dcterms:issued>{issue_date}</dcterms:issued>
 		<dc:language>{language}</dc:language>
 		<dcterms:conformsTo rdf:resource="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0"/>
 		<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/newspaper"/>
 		<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/periodical"/>
 	</rdf:Description>
 </rdf:RDF>
 '''
 SONY_ATOM = u'''\
 <?xml version="1.0" encoding="utf-8" ?>
 <feed xmlns="http://www.w3.org/2005/Atom"
    xmlns:dc="http://purl.org/dc/elements/1.1/"
    xmlns:dcterms="http://purl.org/dc/terms/"
    xmlns:prs="http://xmlns.sony.net/e-book/prs/"
    xmlns:media="http://video.search.yahoo.com/mrss"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
 <title>{short_title}</title>
 <updated>{updated}</updated>
 <id>{id}</id>
 {entries}
 </feed>
 '''
 SONY_ATOM_SECTION = u'''\
 <entry rdf:ID="{title}">
  <title>{title}</title>
  <link href="{href}"/>
  <id>{id}</id>
  <updated>{updated}</updated>
  <summary>{desc}</summary>
  <category term="{short_title}/{title}"
      scheme="http://xmlns.sony.net/e-book/terms/" label="{title}"/>
  <dc:type xsi:type="prs:datatype">newspaper/section</dc:type>
  <dcterms:isReferencedBy rdf:resource=""/>
 </entry>
 '''
 SONY_ATOM_ENTRY = u'''\
 <entry>
  <title>{title}</title>
  <author><name>{author}</name></author>
  <link href="{href}"/>
  <id>{id}</id>
  <updated>{updated}</updated>
  <summary>{desc}</summary>
  <category term="{short_title}/{section_title}"
      scheme="http://xmlns.sony.net/e-book/terms/" label="{section_title}"/>
  <dcterms:extent xsi:type="prs:word-count">{word_count}</dcterms:extent>
  <dc:type xsi:type="prs:datatype">newspaper/article</dc:type>
  <dcterms:isReferencedBy rdf:resource="#{section_title}"/>
 </entry>
 '''
 def sony_metadata(oeb):
    m = oeb.metadata
    title = short_title = unicode(m.title[0])
    publisher = __appname__ + ' ' + __version__
    for k, n in m.title[0].attrib.items():
        if k.endswith('file-as'):
            short_title = n
    try:
        date = unicode(m.date[0]).split('T')[0]
    except:
        date = strftime('%Y-%m-%d')
    try:
        language = unicode(m.language[0]).replace('_', '-')
    except:
        language = 'en'
    short_title = xml(short_title, True)
    metadata = SONY_METADATA.format(title=xml(title),
            short_title=short_title,
            publisher=xml(publisher), issue_date=xml(date),
            language=xml(language))
    updated = strftime('%Y-%m-%dT%H:%M:%SZ')
    def cal_id(x):
        for k, v in x.attrib.items():
            if k.endswith('scheme') and v == 'uuid':
                return True
    try:
        base_id = unicode(list(filter(cal_id, m.identifier))[0])
    except:
        base_id = str(uuid4())
    entries = []
    seen_titles = set([])
    for i, section in enumerate(oeb.toc):
        if not section.href:
            continue
        secid = 'section%d'%i
        sectitle = section.title
        if not sectitle:
            sectitle = _('Unknown')
        d = 1
        bsectitle = sectitle
        while sectitle in seen_titles:
            sectitle = bsectitle + ' ' + str(d)
            d += 1
        seen_titles.add(sectitle)
        sectitle = xml(sectitle, True)
        secdesc = section.description
        if not secdesc:
            secdesc = ''
        secdesc = xml(secdesc)
        entries.append(SONY_ATOM_SECTION.format(title=sectitle,
            href=section.href, id=xml(base_id)+'/'+secid,
            short_title=short_title, desc=secdesc, updated=updated))
        for j, article in enumerate(section):
            if not article.href:
                continue
            atitle = article.title
            btitle = atitle
            d = 1
            while atitle in seen_titles:
                atitle = btitle + ' ' + str(d)
                d += 1
            auth = article.author if article.author else ''
            desc = section.description
            if not desc:
                desc = ''
            aid = 'article%d'%j
            entries.append(SONY_ATOM_ENTRY.format(
                title=xml(atitle),
                author=xml(auth),
                updated=updated,
                desc=desc,
                short_title=short_title,
                section_title=sectitle,
                href=article.href,
                word_count=str(1),
                id=xml(base_id)+'/'+secid+'/'+aid
            ))
    atom = SONY_ATOM.format(short_title=short_title,
            entries='\n\n'.join(entries), updated=updated,
            id=xml(base_id)).encode('utf-8')
    return metadata, atom
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin):
    ])
    def check_for_periodical(self):
-        if self.oeb.metadata.publication_type and \
+        if self.is_periodical:
-            unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
+            self.periodicalize_toc()
-                self.periodicalize_toc()
+            self.check_for_masthead()
-                self.check_for_masthead()
+            self.opts.mobi_periodical = True
                self.opts.mobi_periodical = True
        else:
            self.opts.mobi_periodical = False
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -1102,6 +1102,7 @@ class BasicNewsRecipe(Recipe):
        if self.output_profile.periodical_date_in_title:
            title += strftime(self.timefmt)
        mi = MetaInformation(title, [__appname__])
        mi.title_sort = self.short_title()
        mi.publisher = __appname__
        mi.author_sort = __appname__
        mi.publication_type = 'periodical:'+self.publication_type