mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
broken feeds of a paywalled site
This commit is contained in:
parent
cbef5c2c9a
commit
947dc24f13
Binary file not shown.
Before Width: | Height: | Size: 137 B |
@ -1,48 +0,0 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
|
||||
class PB_PL(BasicNewsRecipe):
|
||||
title = u'Puls Biznesu'
|
||||
__author__ = 'fenuks'
|
||||
language = 'pl'
|
||||
description = u'Puls Biznesu - biznes, ekonomia, giełda, inwestycje'
|
||||
category = u'newspaper'
|
||||
publication_type = u'newspaper'
|
||||
encoding = 'utf-8'
|
||||
# masthead_url = 'http://www.pb.pl/img/pb.png'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'news_content'})
|
||||
feeds = [
|
||||
(u'Wszystkie', u'http://www.pb.pl/atom'),
|
||||
(u'Puls inwestora', u'http://pulsinwestora.pb.pl/atom'),
|
||||
(u'Puls Firmy', u'http://firma.pb.pl/atom'),
|
||||
(u'PB Weekend', u'http://weekend.pb.pl/atom'),
|
||||
(u'Forum MPS', u'http://forummsp.pb.pl/atom'),
|
||||
(u'Moto', u'http://moto.pb.pl/atom'),
|
||||
(u'Kariera i praca', u'http://kariera.pb.pl/atom'),
|
||||
|
||||
(u'Nieruchomości', u'http://nieruchomosci.pb.pl/atom'),
|
||||
(u'Samorządy', u'http://samorzady.pb.pl/atom'),
|
||||
(u'Tech', u'http://tech.pb.pl/atom'),
|
||||
(u'Energetyka', u'http://energetyka.pb.pl/atom'),
|
||||
(u'Retailing', u'http://retailing.pb.pl/atom'),
|
||||
(u'Puls medycyny', u'http://pulsmedycyny.pl/atom'),
|
||||
(u'Logistyka', u'http://logistyka.pb.pl/atom')]
|
||||
|
||||
def print_version(self, url):
|
||||
article_id = re.search(r'(?P<id>\d+,\d+)', url)
|
||||
if article_id:
|
||||
return 'http://www.pb.pl/actionprint/' + article_id.group('id')
|
||||
else:
|
||||
return url
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://archiwum.pb.pl/')
|
||||
cover = soup.find(name='img', attrs={'class': 'cover_picture'})
|
||||
self.cover_url = cover['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
Loading…
x
Reference in New Issue
Block a user