mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
52 lines
1.6 KiB
Plaintext
52 lines
1.6 KiB
Plaintext
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
class NYTimes(BasicNewsRecipe):
|
|
|
|
title = 'Focus'
|
|
__author__ = 'Krittika Goyal'
|
|
language = 'pl'
|
|
description = 'Polish scientific monthly magazine'
|
|
timefmt = ' [%d %b, %Y]'
|
|
needs_subscription = False
|
|
|
|
no_stylesheets = True
|
|
keep_only_tags = dict(name='article', attrs={'class': 'content'})
|
|
remove_tags_after = dict(name='div', attrs={'class': 'inner_article'})
|
|
remove_tags = [
|
|
dict(name='div', attrs={'class': ['social_btns']}),
|
|
]
|
|
|
|
# TO GET ARTICLE TOC
|
|
def nejm_get_index(self):
|
|
return self.index_to_soup('http://www.focus.pl/')
|
|
|
|
# To parse artice toc
|
|
def parse_index(self):
|
|
soup = self.nejm_get_index()
|
|
|
|
toc = soup.find('div', id='wrapper')
|
|
|
|
articles = []
|
|
feeds = []
|
|
section_title = 'Focus Articles'
|
|
for x in toc.findAll(True):
|
|
if x.name == 'h1':
|
|
# Article found
|
|
a = x.find('a')
|
|
if a is None:
|
|
continue
|
|
title = self.tag_to_string(a)
|
|
url = a.get('href', False)
|
|
if not url or not title:
|
|
continue
|
|
# if url.startswith('story'):
|
|
url = 'http://www.focus.pl' + url
|
|
self.log('\t\tFound article:', title)
|
|
self.log('\t\t\t', url)
|
|
articles.append({'title': title, 'url': url,
|
|
'description': '', 'date': ''})
|
|
feeds.append((section_title, articles))
|
|
|
|
return feeds
|