diff --git a/resources/recipes/atlantic.recipe b/resources/recipes/atlantic.recipe
index a41a931e37..5ae0f7d993 100644
--- a/resources/recipes/atlantic.recipe
+++ b/resources/recipes/atlantic.recipe
@@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe):
for poem in soup.findAll('div', attrs={'class':'poem'}):
title = self.tag_to_string(poem.find('h4'))
desc = self.tag_to_string(poem.find(attrs={'class':'author'}))
- url = 'http://www.theatlantic.com'+poem.find('a')['href']
+ url = poem.find('a')['href']
+ if url.startswith('/'):
+ url = 'http://www.theatlantic.com' + url
self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc)
poems.append({'title':title, 'url':url, 'description':desc,
@@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe):
if div is not None:
self.log('Found section: Advice')
title = self.tag_to_string(div.find('h4'))
- url = 'http://www.theatlantic.com'+div.find('a')['href']
+ url = div.find('a')['href']
+ if url.startswith('/'):
+ url = 'http://www.theatlantic.com' + url
desc = self.tag_to_string(div.find('p'))
self.log('\tFound article:', title, 'at', url)
self.log('\t\t', desc)
diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py
index c36f83bd2f..ec83600a49 100644
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin):
'''
raise NotImplementedError
+ @property
+ def is_periodical(self):
+ return self.oeb.metadata.publication_type and \
+ unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:')
+
diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py
index f5de8421e0..53dd01d625 100644
--- a/src/calibre/ebooks/epub/__init__.py
+++ b/src/calibre/ebooks/epub/__init__.py
@@ -15,22 +15,30 @@ def rules(stylesheets):
if r.type == r.STYLE_RULE:
yield r
-def initialize_container(path_to_container, opf_name='metadata.opf'):
+def initialize_container(path_to_container, opf_name='metadata.opf',
+ extra_entries=[]):
'''
Create an empty EPUB document, with a default skeleton.
'''
- CONTAINER='''\
+ rootfiles = ''
+ for path, mimetype, _ in extra_entries:
+ rootfiles += u''.format(
+ path, mimetype)
+ CONTAINER = u'''\
-
+
+ {extra_entries}
- '''%opf_name
+ '''.format(opf_name, extra_entries=rootfiles).encode('utf-8')
zf = ZipFile(path_to_container, 'w')
zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED)
zf.writestr('META-INF/', '', 0700)
zf.writestr('META-INF/container.xml', CONTAINER)
+ for path, _, data in extra_entries:
+ zf.writestr(path, data)
return zf
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index 4146031cd2..38820010a8 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin):
recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)])
+
def workaround_webkit_quirks(self): # {{{
from calibre.ebooks.oeb.base import XPath
for x in self.oeb.spine:
@@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin):
with TemporaryDirectory('_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
+ metadata_xml = None
+ extra_entries = []
+ if self.is_periodical:
+ from calibre.ebooks.epub.periodical import sony_metadata
+ metadata_xml, atom_xml = sony_metadata(oeb)
+ extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
@@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin):
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
from calibre.ebooks.epub import initialize_container
- epub = initialize_container(output_path, os.path.basename(opf))
+ epub = initialize_container(output_path, os.path.basename(opf),
+ extra_entries=extra_entries)
epub.add_dir(tdir)
if encryption is not None:
epub.writestr('META-INF/encryption.xml', encryption)
+ if metadata_xml is not None:
+ epub.writestr('META-INF/metadata.xml',
+ metadata_xml.encode('utf-8'))
if opts.extract_to is not None:
if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to)
diff --git a/src/calibre/ebooks/epub/periodical.py b/src/calibre/ebooks/epub/periodical.py
new file mode 100644
index 0000000000..c68dc9e272
--- /dev/null
+++ b/src/calibre/ebooks/epub/periodical.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal '
+__docformat__ = 'restructuredtext en'
+
+from uuid import uuid4
+
+from calibre.constants import __appname__, __version__
+from calibre import strftime, prepare_string_for_xml as xml
+
+SONY_METADATA = u'''\
+
+
+
+ {title}
+ {publisher}
+ {short_title}
+ {issue_date}
+ {language}
+
+
+
+
+
+'''
+
+SONY_ATOM = u'''\
+
+
+
+{short_title}
+{updated}
+{id}
+{entries}
+
+'''
+
+SONY_ATOM_SECTION = u'''\
+
+ {title}
+
+ {id}
+ {updated}
+ {desc}
+
+ newspaper/section
+
+
+'''
+
+SONY_ATOM_ENTRY = u'''\
+
+ {title}
+ {author}
+
+ {id}
+ {updated}
+ {desc}
+
+ {word_count}
+ newspaper/article
+
+
+'''
+
+def sony_metadata(oeb):
+ m = oeb.metadata
+ title = short_title = unicode(m.title[0])
+ publisher = __appname__ + ' ' + __version__
+ for k, n in m.title[0].attrib.items():
+ if k.endswith('file-as'):
+ short_title = n
+ try:
+ date = unicode(m.date[0]).split('T')[0]
+ except:
+ date = strftime('%Y-%m-%d')
+ try:
+ language = unicode(m.language[0]).replace('_', '-')
+ except:
+ language = 'en'
+ short_title = xml(short_title, True)
+
+ metadata = SONY_METADATA.format(title=xml(title),
+ short_title=short_title,
+ publisher=xml(publisher), issue_date=xml(date),
+ language=xml(language))
+
+ updated = strftime('%Y-%m-%dT%H:%M:%SZ')
+
+ def cal_id(x):
+ for k, v in x.attrib.items():
+ if k.endswith('scheme') and v == 'uuid':
+ return True
+
+ try:
+ base_id = unicode(list(filter(cal_id, m.identifier))[0])
+ except:
+ base_id = str(uuid4())
+
+ entries = []
+ seen_titles = set([])
+ for i, section in enumerate(oeb.toc):
+ if not section.href:
+ continue
+ secid = 'section%d'%i
+ sectitle = section.title
+ if not sectitle:
+ sectitle = _('Unknown')
+ d = 1
+ bsectitle = sectitle
+ while sectitle in seen_titles:
+ sectitle = bsectitle + ' ' + str(d)
+ d += 1
+ seen_titles.add(sectitle)
+ sectitle = xml(sectitle, True)
+ secdesc = section.description
+ if not secdesc:
+ secdesc = ''
+ secdesc = xml(secdesc)
+ entries.append(SONY_ATOM_SECTION.format(title=sectitle,
+ href=section.href, id=xml(base_id)+'/'+secid,
+ short_title=short_title, desc=secdesc, updated=updated))
+
+ for j, article in enumerate(section):
+ if not article.href:
+ continue
+ atitle = article.title
+ btitle = atitle
+ d = 1
+ while atitle in seen_titles:
+ atitle = btitle + ' ' + str(d)
+ d += 1
+
+ auth = article.author if article.author else ''
+ desc = section.description
+ if not desc:
+ desc = ''
+ aid = 'article%d'%j
+
+ entries.append(SONY_ATOM_ENTRY.format(
+ title=xml(atitle),
+ author=xml(auth),
+ updated=updated,
+ desc=desc,
+ short_title=short_title,
+ section_title=sectitle,
+ href=article.href,
+ word_count=str(1),
+ id=xml(base_id)+'/'+secid+'/'+aid
+ ))
+
+ atom = SONY_ATOM.format(short_title=short_title,
+ entries='\n\n'.join(entries), updated=updated,
+ id=xml(base_id)).encode('utf-8')
+
+ return metadata, atom
+
diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py
index 49da18ea7b..4159c6dd40 100644
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin):
])
def check_for_periodical(self):
- if self.oeb.metadata.publication_type and \
- unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'):
- self.periodicalize_toc()
- self.check_for_masthead()
- self.opts.mobi_periodical = True
+ if self.is_periodical:
+ self.periodicalize_toc()
+ self.check_for_masthead()
+ self.opts.mobi_periodical = True
else:
self.opts.mobi_periodical = False
diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py
index f3d77061c3..f710b52204 100644
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@@ -1102,6 +1102,7 @@ class BasicNewsRecipe(Recipe):
if self.output_profile.periodical_date_in_title:
title += strftime(self.timefmt)
mi = MetaInformation(title, [__appname__])
+ mi.title_sort = self.short_title()
mi.publisher = __appname__
mi.author_sort = __appname__
mi.publication_type = 'periodical:'+self.publication_type