calibre/recipes/focus_pl.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

52 lines
1.6 KiB
Plaintext

from calibre.web.feeds.recipes import BasicNewsRecipe
class NYTimes(BasicNewsRecipe):
title = 'Focus'
__author__ = 'Krittika Goyal'
language = 'pl'
description = 'Polish scientific monthly magazine'
timefmt = ' [%d %b, %Y]'
needs_subscription = False
no_stylesheets = True
keep_only_tags = dict(name='article', attrs={'class': 'content'})
remove_tags_after = dict(name='div', attrs={'class': 'inner_article'})
remove_tags = [
dict(name='div', attrs={'class': ['social_btns']}),
]
# TO GET ARTICLE TOC
def nejm_get_index(self):
return self.index_to_soup('http://www.focus.pl/')
# To parse artice toc
def parse_index(self):
soup = self.nejm_get_index()
toc = soup.find('div', id='wrapper')
articles = []
feeds = []
section_title = 'Focus Articles'
for x in toc.findAll(True):
if x.name == 'h1':
# Article found
a = x.find('a')
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
# if url.startswith('story'):
url = 'http://www.focus.pl' + url
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
articles.append({'title': title, 'url': url,
'description': '', 'date': ''})
feeds.append((section_title, articles))
return feeds