diff --git a/resources/recipes/atlantic.recipe b/resources/recipes/atlantic.recipe index a41a931e37..5ae0f7d993 100644 --- a/resources/recipes/atlantic.recipe +++ b/resources/recipes/atlantic.recipe @@ -71,7 +71,9 @@ class TheAtlantic(BasicNewsRecipe): for poem in soup.findAll('div', attrs={'class':'poem'}): title = self.tag_to_string(poem.find('h4')) desc = self.tag_to_string(poem.find(attrs={'class':'author'})) - url = 'http://www.theatlantic.com'+poem.find('a')['href'] + url = poem.find('a')['href'] + if url.startswith('/'): + url = 'http://www.theatlantic.com' + url self.log('\tFound article:', title, 'at', url) self.log('\t\t', desc) poems.append({'title':title, 'url':url, 'description':desc, @@ -83,7 +85,9 @@ class TheAtlantic(BasicNewsRecipe): if div is not None: self.log('Found section: Advice') title = self.tag_to_string(div.find('h4')) - url = 'http://www.theatlantic.com'+div.find('a')['href'] + url = div.find('a')['href'] + if url.startswith('/'): + url = 'http://www.theatlantic.com' + url desc = self.tag_to_string(div.find('p')) self.log('\tFound article:', title, 'at', url) self.log('\t\t', desc) diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index c36f83bd2f..ec83600a49 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -294,3 +294,8 @@ class OutputFormatPlugin(Plugin): ''' raise NotImplementedError + @property + def is_periodical(self): + return self.oeb.metadata.publication_type and \ + unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:') + diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py index f5de8421e0..53dd01d625 100644 --- a/src/calibre/ebooks/epub/__init__.py +++ b/src/calibre/ebooks/epub/__init__.py @@ -15,22 +15,30 @@ def rules(stylesheets): if r.type == r.STYLE_RULE: yield r -def initialize_container(path_to_container, opf_name='metadata.opf'): +def initialize_container(path_to_container, opf_name='metadata.opf', + extra_entries=[]): ''' Create an empty EPUB document, with a default skeleton. ''' - CONTAINER='''\ + rootfiles = '' + for path, mimetype, _ in extra_entries: + rootfiles += u''.format( + path, mimetype) + CONTAINER = u'''\ - + + {extra_entries} - '''%opf_name + '''.format(opf_name, extra_entries=rootfiles).encode('utf-8') zf = ZipFile(path_to_container, 'w') zf.writestr('mimetype', 'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', '', 0700) zf.writestr('META-INF/container.xml', CONTAINER) + for path, _, data in extra_entries: + zf.writestr(path, data) return zf diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py index 4146031cd2..38820010a8 100644 --- a/src/calibre/ebooks/epub/output.py +++ b/src/calibre/ebooks/epub/output.py @@ -106,6 +106,7 @@ class EPUBOutput(OutputFormatPlugin): recommendations = set([('pretty_print', True, OptionRecommendation.HIGH)]) + def workaround_webkit_quirks(self): # {{{ from calibre.ebooks.oeb.base import XPath for x in self.oeb.spine: @@ -183,6 +184,12 @@ class EPUBOutput(OutputFormatPlugin): with TemporaryDirectory('_epub_output') as tdir: from calibre.customize.ui import plugin_for_output_format + metadata_xml = None + extra_entries = [] + if self.is_periodical: + from calibre.ebooks.epub.periodical import sony_metadata + metadata_xml, atom_xml = sony_metadata(oeb) + extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] @@ -194,10 +201,14 @@ class EPUBOutput(OutputFormatPlugin): encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) from calibre.ebooks.epub import initialize_container - epub = initialize_container(output_path, os.path.basename(opf)) + epub = initialize_container(output_path, os.path.basename(opf), + extra_entries=extra_entries) epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', encryption) + if metadata_xml is not None: + epub.writestr('META-INF/metadata.xml', + metadata_xml.encode('utf-8')) if opts.extract_to is not None: if os.path.exists(opts.extract_to): shutil.rmtree(opts.extract_to) diff --git a/src/calibre/ebooks/epub/periodical.py b/src/calibre/ebooks/epub/periodical.py new file mode 100644 index 0000000000..c68dc9e272 --- /dev/null +++ b/src/calibre/ebooks/epub/periodical.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from uuid import uuid4 + +from calibre.constants import __appname__, __version__ +from calibre import strftime, prepare_string_for_xml as xml + +SONY_METADATA = u'''\ + + + + {title} + {publisher} + {short_title} + {issue_date} + {language} + + + + + +''' + +SONY_ATOM = u'''\ + + + +{short_title} +{updated} +{id} +{entries} + +''' + +SONY_ATOM_SECTION = u'''\ + + {title} + + {id} + {updated} + {desc} + + newspaper/section + + +''' + +SONY_ATOM_ENTRY = u'''\ + + {title} + {author} + + {id} + {updated} + {desc} + + {word_count} + newspaper/article + + +''' + +def sony_metadata(oeb): + m = oeb.metadata + title = short_title = unicode(m.title[0]) + publisher = __appname__ + ' ' + __version__ + for k, n in m.title[0].attrib.items(): + if k.endswith('file-as'): + short_title = n + try: + date = unicode(m.date[0]).split('T')[0] + except: + date = strftime('%Y-%m-%d') + try: + language = unicode(m.language[0]).replace('_', '-') + except: + language = 'en' + short_title = xml(short_title, True) + + metadata = SONY_METADATA.format(title=xml(title), + short_title=short_title, + publisher=xml(publisher), issue_date=xml(date), + language=xml(language)) + + updated = strftime('%Y-%m-%dT%H:%M:%SZ') + + def cal_id(x): + for k, v in x.attrib.items(): + if k.endswith('scheme') and v == 'uuid': + return True + + try: + base_id = unicode(list(filter(cal_id, m.identifier))[0]) + except: + base_id = str(uuid4()) + + entries = [] + seen_titles = set([]) + for i, section in enumerate(oeb.toc): + if not section.href: + continue + secid = 'section%d'%i + sectitle = section.title + if not sectitle: + sectitle = _('Unknown') + d = 1 + bsectitle = sectitle + while sectitle in seen_titles: + sectitle = bsectitle + ' ' + str(d) + d += 1 + seen_titles.add(sectitle) + sectitle = xml(sectitle, True) + secdesc = section.description + if not secdesc: + secdesc = '' + secdesc = xml(secdesc) + entries.append(SONY_ATOM_SECTION.format(title=sectitle, + href=section.href, id=xml(base_id)+'/'+secid, + short_title=short_title, desc=secdesc, updated=updated)) + + for j, article in enumerate(section): + if not article.href: + continue + atitle = article.title + btitle = atitle + d = 1 + while atitle in seen_titles: + atitle = btitle + ' ' + str(d) + d += 1 + + auth = article.author if article.author else '' + desc = section.description + if not desc: + desc = '' + aid = 'article%d'%j + + entries.append(SONY_ATOM_ENTRY.format( + title=xml(atitle), + author=xml(auth), + updated=updated, + desc=desc, + short_title=short_title, + section_title=sectitle, + href=article.href, + word_count=str(1), + id=xml(base_id)+'/'+secid+'/'+aid + )) + + atom = SONY_ATOM.format(short_title=short_title, + entries='\n\n'.join(entries), updated=updated, + id=xml(base_id)).encode('utf-8') + + return metadata, atom + diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py index 49da18ea7b..4159c6dd40 100644 --- a/src/calibre/ebooks/mobi/output.py +++ b/src/calibre/ebooks/mobi/output.py @@ -42,11 +42,10 @@ class MOBIOutput(OutputFormatPlugin): ]) def check_for_periodical(self): - if self.oeb.metadata.publication_type and \ - unicode(self.oeb.metadata.publication_type[0]).startswith('periodical:'): - self.periodicalize_toc() - self.check_for_masthead() - self.opts.mobi_periodical = True + if self.is_periodical: + self.periodicalize_toc() + self.check_for_masthead() + self.opts.mobi_periodical = True else: self.opts.mobi_periodical = False diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index f3d77061c3..f710b52204 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1102,6 +1102,7 @@ class BasicNewsRecipe(Recipe): if self.output_profile.periodical_date_in_title: title += strftime(self.timefmt) mi = MetaInformation(title, [__appname__]) + mi.title_sort = self.short_title() mi.publisher = __appname__ mi.author_sort = __appname__ mi.publication_type = 'periodical:'+self.publication_type