calibre/recipes/paperli_topic.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

54 lines
1.9 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
paperli
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre import strftime
class paperli_topics(BasicNewsRecipe):
# Customize this recipe and change paperli_tag and title below to
# download news on your favorite tag
paperli_tag = 'climate'
title = u'The #climate Daily - paperli'
__author__ = 'Hiroshi Miura'
oldest_article = 7
max_articles_per_feed = 100
description = 'paper.li page about ' + paperli_tag
publisher = 'paper.li'
category = 'paper.li'
language = 'en'
encoding = 'utf-8'
remove_javascript = True
masthead_title = u'The ' + paperli_tag + ' Daily'
timefmt = '[%y/%m/%d]'
base_url = 'http://paper.li'
index = base_url + '/tag/' + paperli_tag
def parse_index(self):
# get topics
topics = []
soup = self.index_to_soup(self.index)
topics_lists = soup.find('div', attrs={'class': 'paper-nav-bottom'})
for item in topics_lists.findAll('li', attrs={'class': ""}):
itema = item.find('a', href=True)
topics.append({'title': itema.string, 'url': itema['href']})
# get feeds
feeds = []
for topic in topics:
newsarticles = []
soup = self.index_to_soup(''.join([self.base_url, topic['url']]))
topstories = soup.findAll('div', attrs={'class': 'yui-u'})
for itt in topstories:
itema = itt.find('a', href=True, attrs={'class': 'ts'})
if itema is not None:
itemd = itt.find('div', text=True, attrs={'class': 'text'})
newsarticles.append({
'title': itema.string, 'date': strftime(self.timefmt), 'url': itema['href'], 'description': itemd.string
})
feeds.append((topic['title'], newsarticles))
return feeds