mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Support for the SONY periodical format. Now news downloaded with calibre will appear in the Periodicals section of your SONY and will have the special periodical navigation enabled.
This commit is contained in:
parent
757b8fa4c0
commit
1ebee86c83
@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
for poem in soup.findAll('div', attrs={'class':'poem'}):
|
||||||
title = self.tag_to_string(poem.find('h4'))
|
title = self.tag_to_string(poem.find('h4'))
|
||||||
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
|
||||||
url = 'http://www.theatlantic.com'+poem.find('a')['href']
|
url = poem.find('a')['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.theatlantic.com' + url
|
||||||
self.log('\tFound article:', title, 'at', url)
|
self.log('\tFound article:', title, 'at', url)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
poems.append({'title':title, 'url':url, 'description':desc,
|
poems.append({'title':title, 'url':url, 'description':desc,
|
||||||
@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
if div is not None:
|
if div is not None:
|
||||||
self.log('Found section: Advice')
|
self.log('Found section: Advice')
|
||||||
title = self.tag_to_string(div.find('h4'))
|
title = self.tag_to_string(div.find('h4'))
|
||||||
url = 'http://www.theatlantic.com'+div.find('a')['href']
|
url = div.find('a')['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = 'http://www.theatlantic.com' + url
|
||||||
desc = self.tag_to_string(div.find('p'))
|
desc = self.tag_to_string(div.find('p'))
|
||||||
self.log('\tFound article:', title, 'at', url)
|
self.log('\tFound article:', title, 'at', url)
|
||||||
self.log('\t\t', desc)
|
self.log('\t\t', desc)
|
||||||
|
@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_periodical(self):
|
||||||
|
return self.oeb.metadata.publication_type and \
|
||||||
|
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
|
||||||
|
|
||||||
|
@ -15,22 +15,30 @@ def rules(stylesheets):
|
|||||||
if r.type == r.STYLE_RULE:
|
if r.type == r.STYLE_RULE:
|
||||||
yield r
|
yield r
|
||||||
|
|
||||||
def initialize_container(path_to_container, opf_name='metadata.opf'):
|
def initialize_container(path_to_container, opf_name='metadata.opf',
|
||||||
|
extra_entries=[]):
|
||||||
'''
|
'''
|
||||||
Create an empty EPUB document, with a default skeleton.
|
Create an empty EPUB document, with a default skeleton.
|
||||||
'''
|
'''
|
||||||
CONTAINER='''\
|
rootfiles = ''
|
||||||
|
for path, mimetype, _ in extra_entries:
|
||||||
|
rootfiles += u'<rootfile full-path="{0}" media-type="{1}"/>'.format(
|
||||||
|
path, mimetype)
|
||||||
|
CONTAINER = u'''\
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||||
<rootfiles>
|
<rootfiles>
|
||||||
<rootfile full-path="%s" media-type="application/oebps-package+xml"/>
|
<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
|
||||||
|
{extra_entries}
|
||||||
</rootfiles>
|
</rootfiles>
|
||||||
</container>
|
</container>
|
||||||
'''%opf_name
|
'''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
|
||||||
zf = ZipFile(path_to_container, 'w')
|
zf = ZipFile(path_to_container, 'w')
|
||||||
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
|
||||||
zf.writestr('META-INF/', '', 0700)
|
zf.writestr('META-INF/', '', 0700)
|
||||||
zf.writestr('META-INF/container.xml', CONTAINER)
|
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||||
|
for path, _, data in extra_entries:
|
||||||
|
zf.writestr(path, data)
|
||||||
return zf
|
return zf
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def workaround_webkit_quirks(self): # {{{
|
def workaround_webkit_quirks(self): # {{{
|
||||||
from calibre.ebooks.oeb.base import XPath
|
from calibre.ebooks.oeb.base import XPath
|
||||||
for x in self.oeb.spine:
|
for x in self.oeb.spine:
|
||||||
@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
|
|
||||||
with TemporaryDirectory('_epub_output') as tdir:
|
with TemporaryDirectory('_epub_output') as tdir:
|
||||||
from calibre.customize.ui import plugin_for_output_format
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
|
metadata_xml = None
|
||||||
|
extra_entries = []
|
||||||
|
if self.is_periodical:
|
||||||
|
from calibre.ebooks.epub.periodical import sony_metadata
|
||||||
|
metadata_xml, atom_xml = sony_metadata(oeb)
|
||||||
|
extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
|
||||||
oeb_output = plugin_for_output_format('oeb')
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
||||||
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||||
@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
|
||||||
|
|
||||||
from calibre.ebooks.epub import initialize_container
|
from calibre.ebooks.epub import initialize_container
|
||||||
epub = initialize_container(output_path, os.path.basename(opf))
|
epub = initialize_container(output_path, os.path.basename(opf),
|
||||||
|
extra_entries=extra_entries)
|
||||||
epub.add_dir(tdir)
|
epub.add_dir(tdir)
|
||||||
if encryption is not None:
|
if encryption is not None:
|
||||||
epub.writestr('META-INF/encryption.xml', encryption)
|
epub.writestr('META-INF/encryption.xml', encryption)
|
||||||
|
if metadata_xml is not None:
|
||||||
|
epub.writestr('META-INF/metadata.xml',
|
||||||
|
metadata_xml.encode('utf-8'))
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
if os.path.exists(opts.extract_to):
|
if os.path.exists(opts.extract_to):
|
||||||
shutil.rmtree(opts.extract_to)
|
shutil.rmtree(opts.extract_to)
|
||||||
|
170
src/calibre/ebooks/epub/periodical.py
Normal file
170
src/calibre/ebooks/epub/periodical.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from calibre.constants import __appname__, __version__
|
||||||
|
from calibre import strftime, prepare_string_for_xml as xml
|
||||||
|
|
||||||
|
SONY_METADATA = u'''\
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:prs="http://xmlns.sony.net/e-book/prs/">
|
||||||
|
<rdf:Description rdf:about="">
|
||||||
|
<dc:title>{title}</dc:title>
|
||||||
|
<dc:publisher>{publisher}</dc:publisher>
|
||||||
|
<dcterms:alternative>{short_title}</dcterms:alternative>
|
||||||
|
<dcterms:issued>{issue_date}</dcterms:issued>
|
||||||
|
<dc:language>{language}</dc:language>
|
||||||
|
<dcterms:conformsTo rdf:resource="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0"/>
|
||||||
|
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/newspaper"/>
|
||||||
|
<dcterms:type rdf:resource="http://xmlns.sony.net/e-book/prs/datatype/periodical"/>
|
||||||
|
</rdf:Description>
|
||||||
|
</rdf:RDF>
|
||||||
|
'''
|
||||||
|
|
||||||
|
SONY_ATOM = u'''\
|
||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
||||||
|
xmlns:prs="http://xmlns.sony.net/e-book/prs/"
|
||||||
|
xmlns:media="http://video.search.yahoo.com/mrss"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||||
|
|
||||||
|
<title>{short_title}</title>
|
||||||
|
<updated>{updated}</updated>
|
||||||
|
<id>{id}</id>
|
||||||
|
{entries}
|
||||||
|
</feed>
|
||||||
|
'''
|
||||||
|
|
||||||
|
SONY_ATOM_SECTION = u'''\
|
||||||
|
<entry rdf:ID="{title}">
|
||||||
|
<title>{title}</title>
|
||||||
|
<link href="{href}"/>
|
||||||
|
<id>{id}</id>
|
||||||
|
<updated>{updated}</updated>
|
||||||
|
<summary>{desc}</summary>
|
||||||
|
<category term="{short_title}/{title}"
|
||||||
|
scheme="http://xmlns.sony.net/e-book/terms/" label="{title}"/>
|
||||||
|
<dc:type xsi:type="prs:datatype">newspaper/section</dc:type>
|
||||||
|
<dcterms:isReferencedBy rdf:resource=""/>
|
||||||
|
</entry>
|
||||||
|
'''
|
||||||
|
|
||||||
|
SONY_ATOM_ENTRY = u'''\
|
||||||
|
<entry>
|
||||||
|
<title>{title}</title>
|
||||||
|
<author><name>{author}</name></author>
|
||||||
|
<link href="{href}"/>
|
||||||
|
<id>{id}</id>
|
||||||
|
<updated>{updated}</updated>
|
||||||
|
<summary>{desc}</summary>
|
||||||
|
<category term="{short_title}/{section_title}"
|
||||||
|
scheme="http://xmlns.sony.net/e-book/terms/" label="{section_title}"/>
|
||||||
|
<dcterms:extent xsi:type="prs:word-count">{word_count}</dcterms:extent>
|
||||||
|
<dc:type xsi:type="prs:datatype">newspaper/article</dc:type>
|
||||||
|
<dcterms:isReferencedBy rdf:resource="#{section_title}"/>
|
||||||
|
</entry>
|
||||||
|
'''
|
||||||
|
|
||||||
|
def sony_metadata(oeb):
|
||||||
|
m = oeb.metadata
|
||||||
|
title = short_title = unicode(m.title[0])
|
||||||
|
publisher = __appname__ + ' ' + __version__
|
||||||
|
for k, n in m.title[0].attrib.items():
|
||||||
|
if k.endswith('file-as'):
|
||||||
|
short_title = n
|
||||||
|
try:
|
||||||
|
date = unicode(m.date[0]).split('T')[0]
|
||||||
|
except:
|
||||||
|
date = strftime('%Y-%m-%d')
|
||||||
|
try:
|
||||||
|
language = unicode(m.language[0]).replace('_', '-')
|
||||||
|
except:
|
||||||
|
language = 'en'
|
||||||
|
short_title = xml(short_title, True)
|
||||||
|
|
||||||
|
metadata = SONY_METADATA.format(title=xml(title),
|
||||||
|
short_title=short_title,
|
||||||
|
publisher=xml(publisher), issue_date=xml(date),
|
||||||
|
language=xml(language))
|
||||||
|
|
||||||
|
updated = strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
|
||||||
|
def cal_id(x):
|
||||||
|
for k, v in x.attrib.items():
|
||||||
|
if k.endswith('scheme') and v == 'uuid':
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
base_id = unicode(list(filter(cal_id, m.identifier))[0])
|
||||||
|
except:
|
||||||
|
base_id = str(uuid4())
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
seen_titles = set([])
|
||||||
|
for i, section in enumerate(oeb.toc):
|
||||||
|
if not section.href:
|
||||||
|
continue
|
||||||
|
secid = 'section%d'%i
|
||||||
|
sectitle = section.title
|
||||||
|
if not sectitle:
|
||||||
|
sectitle = _('Unknown')
|
||||||
|
d = 1
|
||||||
|
bsectitle = sectitle
|
||||||
|
while sectitle in seen_titles:
|
||||||
|
sectitle = bsectitle + ' ' + str(d)
|
||||||
|
d += 1
|
||||||
|
seen_titles.add(sectitle)
|
||||||
|
sectitle = xml(sectitle, True)
|
||||||
|
secdesc = section.description
|
||||||
|
if not secdesc:
|
||||||
|
secdesc = ''
|
||||||
|
secdesc = xml(secdesc)
|
||||||
|
entries.append(SONY_ATOM_SECTION.format(title=sectitle,
|
||||||
|
href=section.href, id=xml(base_id)+'/'+secid,
|
||||||
|
short_title=short_title, desc=secdesc, updated=updated))
|
||||||
|
|
||||||
|
for j, article in enumerate(section):
|
||||||
|
if not article.href:
|
||||||
|
continue
|
||||||
|
atitle = article.title
|
||||||
|
btitle = atitle
|
||||||
|
d = 1
|
||||||
|
while atitle in seen_titles:
|
||||||
|
atitle = btitle + ' ' + str(d)
|
||||||
|
d += 1
|
||||||
|
|
||||||
|
auth = article.author if article.author else ''
|
||||||
|
desc = section.description
|
||||||
|
if not desc:
|
||||||
|
desc = ''
|
||||||
|
aid = 'article%d'%j
|
||||||
|
|
||||||
|
entries.append(SONY_ATOM_ENTRY.format(
|
||||||
|
title=xml(atitle),
|
||||||
|
author=xml(auth),
|
||||||
|
updated=updated,
|
||||||
|
desc=desc,
|
||||||
|
short_title=short_title,
|
||||||
|
section_title=sectitle,
|
||||||
|
href=article.href,
|
||||||
|
word_count=str(1),
|
||||||
|
id=xml(base_id)+'/'+secid+'/'+aid
|
||||||
|
))
|
||||||
|
|
||||||
|
atom = SONY_ATOM.format(short_title=short_title,
|
||||||
|
entries='\n\n'.join(entries), updated=updated,
|
||||||
|
id=xml(base_id)).encode('utf-8')
|
||||||
|
|
||||||
|
return metadata, atom
|
||||||
|
|
@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
])
|
])
|
||||||
|
|
||||||
def check_for_periodical(self):
|
def check_for_periodical(self):
|
||||||
if self.oeb.metadata.publication_type and \
|
if self.is_periodical:
|
||||||
unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
|
self.periodicalize_toc()
|
||||||
self.periodicalize_toc()
|
self.check_for_masthead()
|
||||||
self.check_for_masthead()
|
self.opts.mobi_periodical = True
|
||||||
self.opts.mobi_periodical = True
|
|
||||||
else:
|
else:
|
||||||
self.opts.mobi_periodical = False
|
self.opts.mobi_periodical = False
|
||||||
|
|
||||||
|
@ -1102,6 +1102,7 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if self.output_profile.periodical_date_in_title:
|
if self.output_profile.periodical_date_in_title:
|
||||||
title += strftime(self.timefmt)
|
title += strftime(self.timefmt)
|
||||||
mi = MetaInformation(title, [__appname__])
|
mi = MetaInformation(title, [__appname__])
|
||||||
|
mi.title_sort = self.short_title()
|
||||||
mi.publisher = __appname__
|
mi.publisher = __appname__
|
||||||
mi.author_sort = __appname__
|
mi.author_sort = __appname__
|
||||||
mi.publication_type = 'periodical:'+self.publication_type
|
mi.publication_type = 'periodical:'+self.publication_type
|
||||||
|
Loading…
x
Reference in New Issue
Block a user