diff --git a/recipes/icons/puls_biznesu.png b/recipes/icons/puls_biznesu.png deleted file mode 100644 index 0890c8750f..0000000000 Binary files a/recipes/icons/puls_biznesu.png and /dev/null differ diff --git a/recipes/icons/racjonalista_pl.png b/recipes/icons/racjonalista_pl.png deleted file mode 100644 index d8e42d6e11..0000000000 Binary files a/recipes/icons/racjonalista_pl.png and /dev/null differ diff --git a/recipes/icons/res_publica.png b/recipes/icons/res_publica.png deleted file mode 100644 index 133cffa695..0000000000 Binary files a/recipes/icons/res_publica.png and /dev/null differ diff --git a/recipes/icons/rynek_kolejowy.png b/recipes/icons/rynek_kolejowy.png deleted file mode 100644 index 5a7a7f57dd..0000000000 Binary files a/recipes/icons/rynek_kolejowy.png and /dev/null differ diff --git a/recipes/puls_biznesu.recipe b/recipes/puls_biznesu.recipe deleted file mode 100644 index 87501ce49d..0000000000 --- a/recipes/puls_biznesu.recipe +++ /dev/null @@ -1,48 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class PB_PL(BasicNewsRecipe): - title = u'Puls Biznesu' - __author__ = 'fenuks' - language = 'pl' - description = u'Puls Biznesu - biznes, ekonomia, giełda, inwestycje' - category = u'newspaper' - publication_type = u'newspaper' - encoding = 'utf-8' - # masthead_url = 'http://www.pb.pl/img/pb.png' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - remove_empty_feeds = True - ignore_duplicate_articles = {'title', 'url'} - remove_tags_after = dict(name='div', attrs={'class': 'news_content'}) - feeds = [ - (u'Wszystkie', u'http://www.pb.pl/atom'), - (u'Puls inwestora', u'http://pulsinwestora.pb.pl/atom'), - (u'Puls Firmy', u'http://firma.pb.pl/atom'), - (u'PB Weekend', u'http://weekend.pb.pl/atom'), - (u'Forum MPS', u'http://forummsp.pb.pl/atom'), - (u'Moto', u'http://moto.pb.pl/atom'), - (u'Kariera i praca', u'http://kariera.pb.pl/atom'), - - (u'Nieruchomości', u'http://nieruchomosci.pb.pl/atom'), - (u'Samorządy', u'http://samorzady.pb.pl/atom'), - (u'Tech', u'http://tech.pb.pl/atom'), - (u'Energetyka', u'http://energetyka.pb.pl/atom'), - (u'Retailing', u'http://retailing.pb.pl/atom'), - (u'Puls medycyny', u'http://pulsmedycyny.pl/atom'), - (u'Logistyka', u'http://logistyka.pb.pl/atom')] - - def print_version(self, url): - article_id = re.search(r'(?P\d+,\d+)', url) - if article_id: - return 'http://www.pb.pl/actionprint/' + article_id.group('id') - else: - return url - - def get_cover_url(self): - soup = self.index_to_soup('http://archiwum.pb.pl/') - cover = soup.find(name='img', attrs={'class': 'cover_picture'}) - self.cover_url = cover['src'] - return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/pure_pc.recipe b/recipes/pure_pc.recipe index 952285c017..ae599c4759 100644 --- a/recipes/pure_pc.recipe +++ b/recipes/pure_pc.recipe @@ -11,35 +11,10 @@ class PurePC(BasicNewsRecipe): description = u'Artykuły, aktualności, sprzęt, forum, chłodzenie, modding, urządzenia mobilne - wszystko w jednym miejscu.' category = 'IT' language = 'pl' - masthead_url = 'http://www.purepc.pl/themes/new/images/purepc.jpg' cover_url = 'http://www.purepc.pl/themes/new/images/purepc.jpg' extra_css = '.wykres_logo {float: left; margin-right: 5px;}' no_stylesheets = True - keep_only_tags = [dict(id='content')] - remove_tags_after = dict(attrs={'class': 'fivestar-widget'}) - remove_tags = [dict(id='navigator'), dict( - attrs={'class': ['box-tools', 'fivestar-widget', 'PageMenuList']})] + + keep_only_tags = [dict(name='div', attrs={'class':'node page0'})] + remove_tags = [dict(name='div', attrs={'class':'article-options'})] feeds = [(u'Wiadomo\u015bci', u'http://www.purepc.pl/node/feed')] - - def append_page(self, soup, appendtag): - lasturl = appendtag.find(attrs={'class': 'pager-last'}) - if lasturl: - regex = re.search('(.+?2C)(\d+)', lasturl.a['href']) - baseurl = regex.group(1).replace('?page=0%2C', '?page=1%2C') - baseurl = 'http://www.purepc.pl' + baseurl - nr = int(regex.group(2)) - for page_nr in range(1, nr + 1): - soup2 = self.index_to_soup(baseurl + str(page_nr)) - pagetext = soup2.find(attrs={'class': 'article'}) - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'class': ['PageMenuList', 'pager', 'fivestar-widget']}): - r.extract() - comments = appendtag.findAll( - text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup diff --git a/recipes/racjonalista_pl.recipe b/recipes/racjonalista_pl.recipe deleted file mode 100644 index 16a98f2b0c..0000000000 --- a/recipes/racjonalista_pl.recipe +++ /dev/null @@ -1,58 +0,0 @@ -__copyright__ = '2012, Micha\u0142 ' -''' -Racjonalista.pl -''' - -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class Racjonalista(BasicNewsRecipe): - __author__ = u'Micha\u0142 ' - publisher = u'Fundacja Wolnej My\u015bli' - title = u'Racjonalista.pl' - description = u'Racjonalista.pl' - category = 'newspaper' - language = 'pl' - encoding = 'iso-8859-2' - oldest_article = 7 - max_articles_per_feed = 20 - remove_javascript = True - no_stylesheets = True - use_embedded_content = False - simultaneous_downloads = 2 - timeout = 30 - cover_url = 'http://www.racjonalista.pl/img/uimg/rac.gif' - - feeds = [(u'Racjonalista.pl', u'http://www.racjonalista.pl/rss.php')] - - match_regexps = [r'kk\.php'] - - def print_version(self, url): - return url.replace('/s,', '/t,') - - extra_css = 'h2 {font: serif large} .cytat {text-align: right}' - - remove_attributes = ['target', 'width', 'height'] - - preprocess_regexps = [ - (re.compile(i[0], re.DOTALL), i[1]) for i in - [(r']*> 

', lambda match: ''), - (r' ', lambda match: ' '), - (r']+>', lambda match: ''), - (r']+>', lambda match: ''), - (r'', lambda match: ''), - (r'[^<]+)', - lambda match: '' + match.group('a') + ''), - (r'
(?P[^<]+)
', - lambda match: '

' + match.group('t') + '

'), - (r'
', lambda match: ''), # noqa - (r'
', lambda match: ''), - (r'
', lambda match: ''), - (r']+>(?P

[^<]+)', - lambda match: '' + match.group('p') + ''), - (r']+>(?P[^<]+)', lambda match: match.group('a')), - (r'Orygin[^<]+', - lambda match: ''), - (r'Poka[^<]+', lambda match: '')] - ] diff --git a/recipes/res_publica.recipe b/recipes/res_publica.recipe deleted file mode 100644 index 14341b4aa8..0000000000 --- a/recipes/res_publica.recipe +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class ResPublicaNowaRecipe(BasicNewsRecipe): - __license__ = 'GPL v3' - __author__ = u'Artur Stachecki ' - language = 'pl' - version = 1 - - title = u'Res Publica Nowa' - category = u'News' - description = u'Portal kulturalno-społecznego kwartalnika o profilu liberalnym, wydawany przez Fundację Res Publica' - cover_url = '' - remove_empty_feeds = True - no_stylesheets = True - oldest_article = 7 - max_articles_per_feed = 100000 - recursions = 0 - no_stylesheets = True - remove_javascript = True - simultaneous_downloads = 5 - - feeds = [ - ('Artykuly', 'feed://publica.pl/feed'), - ] - - def preprocess_html(self, soup): - for alink in soup.findAll('a'): - if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup diff --git a/recipes/rmf24_ESKN.recipe b/recipes/rmf24_ESKN.recipe index ace2cf84c8..281f3a4c65 100644 --- a/recipes/rmf24_ESKN.recipe +++ b/recipes/rmf24_ESKN.recipe @@ -26,13 +26,11 @@ class RMF24_ESKN(BasicNewsRecipe): (u'Nauka', u'http://www.rmf24.pl/nauka/feed')] keep_only_tags = [ - dict(name='div', attrs={'class': 'box articleSingle print'})] + dict(name='header', attrs={'class': 'article-header'}), + dict(name='div', attrs={'class': 'article-container'})] - remove_tags = [ - dict(name='div', attrs={'class': 'toTop'}), - dict(name='div', attrs={'class': 'category'}), - dict(name='div', attrs={'class': 'REMOVE'}), - dict(name='div', attrs={'class': 'embed embedAd'})] + remove_tags = [dict(name='div', attrs={'id': 'ReklamaMobile'}), + dict(name='img', attrs={'class': 'img-responsive hidden-lg hidden-md hidden-sm'})] extra_css = ''' h1 { font-size: 1.2em; } @@ -42,7 +40,6 @@ class RMF24_ESKN(BasicNewsRecipe): (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in [ (r'

Zdj.cie

', lambda match: ''), - (r'embed embed(Left|Right|Center) articleEmbed(Audio|Wideo articleEmbedVideo|ArticleFull|ArticleTitle|ArticleListTitle|AlbumHorizontal)">', lambda match: 'REMOVE">'), # noqa (r'', lambda match: 'REMOVE">'), # noqa (r'