mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
67 lines
2.7 KiB
Plaintext
67 lines
2.7 KiB
Plaintext
# -*- coding: utf-8 -*-
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class Focus_pl(BasicNewsRecipe):
|
|
title = u'Focus.pl'
|
|
oldest_article = 15
|
|
max_articles_per_feed = 100
|
|
__author__ = 'fenuks'
|
|
language = 'pl'
|
|
description ='polish scientific monthly magazine'
|
|
category='magazine'
|
|
cover_url=''
|
|
remove_empty_feeds= True
|
|
no_stylesheets=True
|
|
remove_tags_before=dict(name='div', attrs={'class':'h2 h2f'})
|
|
remove_tags_after=dict(name='div', attrs={'class':'clear'})
|
|
feeds = [(u'Wszystkie kategorie', u'http://focus.pl.feedsportal.com/c/32992/f/532692/index.rss'),
|
|
(u'Nauka', u'http://focus.pl.feedsportal.com/c/32992/f/532693/index.rss'),
|
|
(u'Historia', u'http://focus.pl.feedsportal.com/c/32992/f/532694/index.rss'),
|
|
(u'Cywilizacja', u'http://focus.pl.feedsportal.com/c/32992/f/532695/index.rss'),
|
|
(u'Sport', u'http://focus.pl.feedsportal.com/c/32992/f/532696/index.rss'),
|
|
(u'Technika', u'http://focus.pl.feedsportal.com/c/32992/f/532697/index.rss'),
|
|
(u'Przyroda', u'http://focus.pl.feedsportal.com/c/32992/f/532698/index.rss'),
|
|
(u'Technologie', u'http://focus.pl.feedsportal.com/c/32992/f/532699/index.rss'),
|
|
(u'Warto wiedzieć', u'http://focus.pl.feedsportal.com/c/32992/f/532700/index.rss'),
|
|
|
|
|
|
|
|
]
|
|
|
|
def skip_ad_pages(self, soup):
|
|
tag=soup.find(name='a')
|
|
if tag:
|
|
new_soup=self.index_to_soup(tag['href']+ 'do-druku/1/', raw=True)
|
|
return new_soup
|
|
|
|
def append_page(self, appendtag):
|
|
tag=appendtag.find(name='div', attrs={'class':'arrows'})
|
|
if tag:
|
|
nexturl='http://www.focus.pl/'+tag.a['href']
|
|
for rem in appendtag.findAll(name='div', attrs={'class':'klik-nav'}):
|
|
rem.extract()
|
|
while nexturl:
|
|
soup2=self.index_to_soup(nexturl)
|
|
nexturl=None
|
|
pagetext=soup2.find(name='div', attrs={'class':'txt'})
|
|
tag=pagetext.find(name='div', attrs={'class':'arrows'})
|
|
for r in tag.findAll(name='a'):
|
|
if u'Następne' in r.string:
|
|
nexturl='http://www.focus.pl/'+r['href']
|
|
for rem in pagetext.findAll(name='div', attrs={'class':'klik-nav'}):
|
|
rem.extract()
|
|
pos = len(appendtag.contents)
|
|
appendtag.insert(pos, pagetext)
|
|
|
|
def get_cover_url(self):
|
|
soup=self.index_to_soup('http://www.focus.pl/magazyn/')
|
|
tag=soup.find(name='div', attrs={'class':'clr fl'})
|
|
if tag:
|
|
self.cover_url='http://www.focus.pl/' + tag.a['href']
|
|
return getattr(self, 'cover_url', self.cover_url)
|
|
|
|
|
|
def preprocess_html(self, soup):
|
|
self.append_page(soup.body)
|
|
return soup
|