Support for the SONY periodical format. Now news downloaded with calibre will appear in the Periodicals section of your SONY and will have the special periodical navigation enabled.

This commit is contained in:
Kovid Goyal 2010-10-18 20:02:32 -06:00
parent 757b8fa4c0
commit 1ebee86c83
7 changed files with 210 additions and 12 deletions

View File

@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
for poem in soup.findAll('div', attrs={'class':'poem'}): for poem in soup.findAll('div', attrs={'class':'poem'}):
title = self.tag_to_string(poem.find('h4')) title = self.tag_to_string(poem.find('h4'))
desc = self.tag_to_string(poem.find(attrs={'class':'author'})) desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
url = 'http://www.theatlantic.com'+poem.find('a')['href'] url = poem.find('a')['href']
if url.startswith('/'):
url = 'http://www.theatlantic.com' + url
self.log('\tFound article:', title, 'at', url) self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc) self.log('\t\t', desc)
poems.append({'title':title, 'url':url, 'description':desc, poems.append({'title':title, 'url':url, 'description':desc,
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
if div is not None: if div is not None:
self.log('Found section: Advice') self.log('Found section: Advice')
title = self.tag_to_string(div.find('h4')) title = self.tag_to_string(div.find('h4'))
url = 'http://www.theatlantic.com'+div.find('a')['href'] url = div.find('a')['href']
if url.startswith('/'):
url = 'http://www.theatlantic.com' + url
desc = self.tag_to_string(div.find('p')) desc = self.tag_to_string(div.find('p'))
self.log('\tFound article:', title, 'at', url) self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc) self.log('\t\t', desc)

View File

@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
''' '''
raise NotImplementedError raise NotImplementedError
@property
def is_periodical(self):
return self.oeb.metadata.publication_type and \
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')

View File

@ -15,22 +15,30 @@ def rules(stylesheets):
if r.type == r.STYLE_RULE: if r.type == r.STYLE_RULE:
yield r yield r
def initialize_container(path_to_container, opf_name='metadata.opf'): def initialize_container(path_to_container, opf_name='metadata.opf',
extra_entries=[]):
''' '''
Create an empty EPUB document, with a default skeleton. Create an empty EPUB document, with a default skeleton.
''' '''
CONTAINER='''\ rootfiles = ''
for path, mimetype, _ in extra_entries:
rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
path, mimetype)
CONTAINER = u'''\
<?xml version="1.0"?> <?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles> <rootfiles>
<rootfile full-path="%s" media-type="application/oebps-package+xml"/> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
{extra_entries}
</rootfiles> </rootfiles>
</container> </container>
'''%opf_name '''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
zf = ZipFile(path_to_container, 'w') zf = ZipFile(path_to_container, 'w')
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED) zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
zf.writestr('META-INF/', '', 0700) zf.writestr('META-INF/', '', 0700)
zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr('META-INF/container.xml', CONTAINER)
for path, _, data in extra_entries:
zf.writestr(path, data)
return zf return zf

View File

@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)]) recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
def workaround_webkit_quirks(self): # {{{ def workaround_webkit_quirks(self): # {{{
from calibre.ebooks.oeb.base import XPath from calibre.ebooks.oeb.base import XPath
for x in self.oeb.spine: for x in self.oeb.spine:
@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin):
with TemporaryDirectory('_epub_output') as tdir: with TemporaryDirectory('_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format from calibre.customize.ui import plugin_for_output_format
metadata_xml = None
extra_entries = []
if self.is_periodical:
from calibre.ebooks.epub.periodical import sony_metadata
metadata_xml, atom_xml = sony_metadata(oeb)
extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
oeb_output = plugin_for_output_format('oeb') oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, log) oeb_output.convert(oeb, tdir, input_plugin, opts, log)
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin):
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
from calibre.ebooks.epub import initialize_container from calibre.ebooks.epub import initialize_container
epub = initialize_container(output_path, os.path.basename(opf)) epub = initialize_container(output_path, os.path.basename(opf),
extra_entries=extra_entries)
epub.add_dir(tdir) epub.add_dir(tdir)
if encryption is not None: if encryption is not None:
epub.writestr('META-INF/encryption.xml', encryption) epub.writestr('META-INF/encryption.xml', encryption)
if metadata_xml is not None:
epub.writestr('META-INF/metadata.xml',
metadata_xml.encode('utf-8'))
if opts.extract_to is not None: if opts.extract_to is not None:
if os.path.exists(opts.extract_to): if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to) shutil.rmtree(opts.extract_to)

View File

@ -0,0 +1,170 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from uuid import uuid4
from calibre.constants import __appname__, __version__
from calibre import strftime, prepare_string_for_xml as xml
SONY_METADATA = u'''\
<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:prs="http://xmlns.sony.net/e-book/prs/">
<rdf:Description rdf:about="">
<dc:title>{title}</dc:title>
<dc:publisher>{publisher}</dc:publisher>
<dcterms:alternative>{short_title}</dcterms:alternative>
<dcterms:issued>{issue_date}</dcterms:issued>
<dc:language>{language}</dc:language>
<dcterms:conformsTo rdf:resource="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0"/>
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/newspaper"/>
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/periodical"/>
</rdf:Description>
</rdf:RDF>
'''
SONY_ATOM = u'''\
<?xml version="1.0" encoding="utf-8" ?>
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:prs="http://xmlns.sony.net/e-book/prs/"
xmlns:media="http://video.search.yahoo.com/mrss"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<title>{short_title}</title>
<updated>{updated}</updated>
<id>{id}</id>
{entries}
</feed>
'''
SONY_ATOM_SECTION = u'''\
<entry rdf:ID="{title}">
<title>{title}</title>
<link href="{href}"/>
<id>{id}</id>
<updated>{updated}</updated>
<summary>{desc}</summary>
<category term="{short_title}/{title}"
scheme="http://xmlns.sony.net/e-book/terms/" label="{title}"/>
<dc:type xsi:type="prs:datatype">newspaper/section</dc:type>
<dcterms:isReferencedBy rdf:resource=""/>
</entry>
'''
SONY_ATOM_ENTRY = u'''\
<entry>
<title>{title}</title>
<author><name>{author}</name></author>
<link href="{href}"/>
<id>{id}</id>
<updated>{updated}</updated>
<summary>{desc}</summary>
<category term="{short_title}/{section_title}"
scheme="http://xmlns.sony.net/e-book/terms/" label="{section_title}"/>
<dcterms:extent xsi:type="prs:word-count">{word_count}</dcterms:extent>
<dc:type xsi:type="prs:datatype">newspaper/article</dc:type>
<dcterms:isReferencedBy rdf:resource="#{section_title}"/>
</entry>
'''
def sony_metadata(oeb):
m = oeb.metadata
title = short_title = unicode(m.title[0])
publisher = __appname__ + ' ' + __version__
for k, n in m.title[0].attrib.items():
if k.endswith('file-as'):
short_title = n
try:
date = unicode(m.date[0]).split('T')[0]
except:
date = strftime('%Y-%m-%d')
try:
language = unicode(m.language[0]).replace('_', '-')
except:
language = 'en'
short_title = xml(short_title, True)
metadata = SONY_METADATA.format(title=xml(title),
short_title=short_title,
publisher=xml(publisher), issue_date=xml(date),
language=xml(language))
updated = strftime('%Y-%m-%dT%H:%M:%SZ')
def cal_id(x):
for k, v in x.attrib.items():
if k.endswith('scheme') and v == 'uuid':
return True
try:
base_id = unicode(list(filter(cal_id, m.identifier))[0])
except:
base_id = str(uuid4())
entries = []
seen_titles = set([])
for i, section in enumerate(oeb.toc):
if not section.href:
continue
secid = 'section%d'%i
sectitle = section.title
if not sectitle:
sectitle = _('Unknown')
d = 1
bsectitle = sectitle
while sectitle in seen_titles:
sectitle = bsectitle + ' ' + str(d)
d += 1
seen_titles.add(sectitle)
sectitle = xml(sectitle, True)
secdesc = section.description
if not secdesc:
secdesc = ''
secdesc = xml(secdesc)
entries.append(SONY_ATOM_SECTION.format(title=sectitle,
href=section.href, id=xml(base_id)+'/'+secid,
short_title=short_title, desc=secdesc, updated=updated))
for j, article in enumerate(section):
if not article.href:
continue
atitle = article.title
btitle = atitle
d = 1
while atitle in seen_titles:
atitle = btitle + ' ' + str(d)
d += 1
auth = article.author if article.author else ''
desc = section.description
if not desc:
desc = ''
aid = 'article%d'%j
entries.append(SONY_ATOM_ENTRY.format(
title=xml(atitle),
author=xml(auth),
updated=updated,
desc=desc,
short_title=short_title,
section_title=sectitle,
href=article.href,
word_count=str(1),
id=xml(base_id)+'/'+secid+'/'+aid
))
atom = SONY_ATOM.format(short_title=short_title,
entries='\n\n'.join(entries), updated=updated,
id=xml(base_id)).encode('utf-8')
return metadata, atom

View File

@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin):
]) ])
def check_for_periodical(self): def check_for_periodical(self):
if self.oeb.metadata.publication_type and \ if self.is_periodical:
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'): self.periodicalize_toc()
self.periodicalize_toc() self.check_for_masthead()
self.check_for_masthead() self.opts.mobi_periodical = True
self.opts.mobi_periodical = True
else: else:
self.opts.mobi_periodical = False self.opts.mobi_periodical = False

View File

@ -1102,6 +1102,7 @@ class BasicNewsRecipe(Recipe):
if self.output_profile.periodical_date_in_title: if self.output_profile.periodical_date_in_title:
title += strftime(self.timefmt) title += strftime(self.timefmt)
mi = MetaInformation(title, [__appname__]) mi = MetaInformation(title, [__appname__])
mi.title_sort = self.short_title()
mi.publisher = __appname__ mi.publisher = __appname__
mi.author_sort = __appname__ mi.author_sort = __appname__
mi.publication_type = 'periodical:'+self.publication_type mi.publication_type = 'periodical:'+self.publication_type