mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
broken feeds of a paywalled site
This commit is contained in:
parent
cbef5c2c9a
commit
947dc24f13
Binary file not shown.
Before Width: | Height: | Size: 137 B |
@ -1,48 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class PB_PL(BasicNewsRecipe):
|
|
||||||
title = u'Puls Biznesu'
|
|
||||||
__author__ = 'fenuks'
|
|
||||||
language = 'pl'
|
|
||||||
description = u'Puls Biznesu - biznes, ekonomia, giełda, inwestycje'
|
|
||||||
category = u'newspaper'
|
|
||||||
publication_type = u'newspaper'
|
|
||||||
encoding = 'utf-8'
|
|
||||||
# masthead_url = 'http://www.pb.pl/img/pb.png'
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
remove_empty_feeds = True
|
|
||||||
ignore_duplicate_articles = {'title', 'url'}
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'news_content'})
|
|
||||||
feeds = [
|
|
||||||
(u'Wszystkie', u'http://www.pb.pl/atom'),
|
|
||||||
(u'Puls inwestora', u'http://pulsinwestora.pb.pl/atom'),
|
|
||||||
(u'Puls Firmy', u'http://firma.pb.pl/atom'),
|
|
||||||
(u'PB Weekend', u'http://weekend.pb.pl/atom'),
|
|
||||||
(u'Forum MPS', u'http://forummsp.pb.pl/atom'),
|
|
||||||
(u'Moto', u'http://moto.pb.pl/atom'),
|
|
||||||
(u'Kariera i praca', u'http://kariera.pb.pl/atom'),
|
|
||||||
|
|
||||||
(u'Nieruchomości', u'http://nieruchomosci.pb.pl/atom'),
|
|
||||||
(u'Samorządy', u'http://samorzady.pb.pl/atom'),
|
|
||||||
(u'Tech', u'http://tech.pb.pl/atom'),
|
|
||||||
(u'Energetyka', u'http://energetyka.pb.pl/atom'),
|
|
||||||
(u'Retailing', u'http://retailing.pb.pl/atom'),
|
|
||||||
(u'Puls medycyny', u'http://pulsmedycyny.pl/atom'),
|
|
||||||
(u'Logistyka', u'http://logistyka.pb.pl/atom')]
|
|
||||||
|
|
||||||
def print_version(self, url):
|
|
||||||
article_id = re.search(r'(?P<id>\d+,\d+)', url)
|
|
||||||
if article_id:
|
|
||||||
return 'http://www.pb.pl/actionprint/' + article_id.group('id')
|
|
||||||
else:
|
|
||||||
return url
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
|
||||||
soup = self.index_to_soup('http://archiwum.pb.pl/')
|
|
||||||
cover = soup.find(name='img', attrs={'class': 'cover_picture'})
|
|
||||||
self.cover_url = cover['src']
|
|
||||||
return getattr(self, 'cover_url', self.cover_url)
|
|
Loading…
x
Reference in New Issue
Block a user