From e7dd6770e7749d30408c213ac2c3e6283f13b91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Thu, 13 Oct 2016 23:58:10 +0200 Subject: [PATCH 1/3] recipes: improve elektroda_pl --- recipes/elektroda_pl.recipe | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe index 6a27e9f15a..3c1c4f0f67 100644 --- a/recipes/elektroda_pl.recipe +++ b/recipes/elektroda_pl.recipe @@ -12,11 +12,12 @@ class Elektroda(BasicNewsRecipe): language = 'pl' max_articles_per_feed = 100 no_stylesheets = True - remove_tags_before = dict(name='span', attrs={'class': 'postbody'}) - remove_tags_after = dict(name='td', attrs={'class': 'spaceRow'}) - remove_tags = [dict(name='a', attrs={'href': '#top'})] feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')] + keep_only_tags = [dict(name='div', attrs={'class': 'title-wrap pull-left'}), + dict(name='ul', attrs={'class': 'topic-lists clearfix'}) + ] + def preprocess_html(self, soup): tag = soup.find('span', attrs={'class': 'postbody'}) if tag: From 4b571f1553ca722ccb9b5d38d71bd1be63e76ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 14 Oct 2016 00:01:38 +0200 Subject: [PATCH 2/3] recipes: fix kosmonauta.net --- recipes/kosmonauta_pl.recipe | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe index 42c8f1b75a..8cc7258c1b 100644 --- a/recipes/kosmonauta_pl.recipe +++ b/recipes/kosmonauta_pl.recipe @@ -10,30 +10,11 @@ class Kosmonauta(BasicNewsRecipe): category = 'astronomy' language = 'pl' cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' - extra_css = '.thumb-left {float:left; margin-right:5px;} .calibre_navbar {clear: both;}' no_stylesheets = True - INDEX = 'http://www.kosmonauta.net' oldest_article = 7 no_stylesheets = True remove_javascript = True remove_attributes = ['style'] max_articles_per_feed = 100 - keep_only_tags = [dict(name='div', attrs={'class': 'item-page'})] - remove_tags = [dict(attrs={'class': ['article-tools clearfix', 'cedtag', 'nav clearfix', - 'jwDisqusForm']}), dict(attrs={'alt': ['Poprzednia strona', 'Następna strona']})] - remove_tags_after = dict(name='div', attrs={'class': 'cedtag'}) - feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/?format=feed&type=atom')] + feeds = [(u'Kosmonauta.net', u'http://www.kosmonauta.net/feed')] - def print_version(self, url): - return url + '?tmpl=component&print=1&layout=default&page=' - - def preprocess_html(self, soup): - for a in soup.findAll(name='a'): - if a.has_key('href'): # noqa - href = a['href'] - if not href.startswith('http'): - a['href'] = self.INDEX + href - for a in soup.findAll(name='img'): - if a.has_key('style') and 'float:' in a['style']: # noqa - a['class'] = 'thumb-left' - return soup From ba8b4fdb9a9aadd4094c5bf21bd6ad0a2af939ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Fri, 14 Oct 2016 00:30:14 +0200 Subject: [PATCH 3/3] recipes: bring gosc_niedzielny back to life --- recipes/gosc_niedzielny.recipe | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/recipes/gosc_niedzielny.recipe b/recipes/gosc_niedzielny.recipe index 8f1b283600..3657d88643 100644 --- a/recipes/gosc_niedzielny.recipe +++ b/recipes/gosc_niedzielny.recipe @@ -3,13 +3,12 @@ __license__ = 'GPL v3' __copyright__ = '2011, Piotr Kontek, piotr.kontek@gmail.com \ - 2013, Tomasz Długosz, tomek3d@gmail.com' + 2013-2016, Tomasz Długosz, tomek3d@gmail.com' from calibre.web.feeds.news import BasicNewsRecipe import re from lxml import html - class GN(BasicNewsRecipe): __author__ = 'Piotr Kontek, Tomasz Długosz' @@ -51,7 +50,7 @@ class GN(BasicNewsRecipe): return feeds def find_articles(self, main_block): - for a in main_block.findAll('div', attrs={'class': ['prev_doc2', 'sr-document']}): + for a in main_block.findAll('div', attrs={'class': ['prev_doc_n1 prev_doc_img21']}): art = a.find('a') yield { 'title': self.tag_to_string(art), @@ -81,8 +80,8 @@ class GN(BasicNewsRecipe): ] remove_tags = [ - dict(name='p', attrs={'class': ['r tr', 'l l-2', 'wykop']}), - dict(name='div', attrs={'class': ['doc_actions', 'cf', 'fr1_cl']}), + dict(name='p', attrs={'class': ['r tr', 'l l-2', 'wykop', 'l l-2 doc-source']}), + dict(name='div', attrs={'class': ['doc_actions', 'cf', 'fr1_cl','txt__social-icons','txt__tags']}), dict(name='div', attrs={'id': 'vote'}), dict(name='a', attrs={'class': 'img_enlarge'}) ]