diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe index 55858020ad..34871ea04a 100644 --- a/recipes/elektroda_pl.recipe +++ b/recipes/elektroda_pl.recipe @@ -10,6 +10,7 @@ class Elektroda(BasicNewsRecipe): category = 'electronics' language = 'pl' max_articles_per_feed = 100 + no_stylesheets= True remove_tags_before=dict(name='span', attrs={'class':'postbody'}) remove_tags_after=dict(name='td', attrs={'class':'spaceRow'}) remove_tags=[dict(name='a', attrs={'href':'#top'})] diff --git a/recipes/gameplay_pl.recipe b/recipes/gameplay_pl.recipe index 7b0ccb4f55..dc90d79ed1 100644 --- a/recipes/gameplay_pl.recipe +++ b/recipes/gameplay_pl.recipe @@ -12,8 +12,8 @@ class Gameplay_pl(BasicNewsRecipe): max_articles_per_feed = 100 remove_javascript= True no_stylesheets= True - keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})] - remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})] + keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news', 'news_container']})] + remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi', 'news_tagi']}), dict(attrs={'usemap':'#map'}), dict(name='a', attrs={'class':['pin-it-button', 'twitter-share-button']})] feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')] def image_url_processor(self, baseurl, url): diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index 1f8147ba3d..36982788f1 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -12,13 +12,16 @@ class Gram_pl(BasicNewsRecipe): no_stylesheets= True extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' - remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])] - keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']})] + remove_tags= [dict(name='p', attrs={'class':['extraText', 'must-log-in']}), dict(attrs={'class':['el', 'headline', 'post-info', 'entry-footer clearfix']}), dict(name='div', attrs={'class':['twojaOcena', 'comment-body', 'comment-author vcard', 'comment-meta commentmetadata', 'tw_button', 'entry-comment-counter', 'snap_nopreview sharing robots-nocontent']}), dict(id=['igit_rpwt_css', 'comments', 'reply-title', 'igit_title'])] + keep_only_tags= [dict(name='div', attrs={'class':['main', 'arkh-postmetadataheader', 'arkh-postcontent', 'post', 'content', 'news_header', 'news_subheader', 'news_text']}), dict(attrs={'class':['contentheading', 'contentpaneopen']}), dict(name='article')] feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'), - (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')] + (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'), + (u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'), + #(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss') + ] - def parse_feeds (self): - feeds = BasicNewsRecipe.parse_feeds(self) + def parse_feeds (self): + feeds = BasicNewsRecipe.parse_feeds(self) for feed in feeds: for article in feed.articles[:]: if 'REKLAMA SKLEP' in article.title.upper() or u'ARTYKUŁ:' in article.title.upper(): @@ -56,4 +59,4 @@ class Gram_pl(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/historia_pl.recipe b/recipes/historia_pl.recipe index 34ca158a96..f3353fe89f 100644 --- a/recipes/historia_pl.recipe +++ b/recipes/historia_pl.recipe @@ -8,15 +8,21 @@ class Historia_org_pl(BasicNewsRecipe): category = 'history' language = 'pl' oldest_article = 8 - remove_empty_feeds=True + remove_empty_feeds= True + no_stylesheets = True + use_embedded_content = True max_articles_per_feed = 100 - feeds = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=rss'), - (u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=rss'), - (u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=rss'), - (u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=rss'), - (u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=rss'), - (u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=rss'), - (u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=rss'), - (u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=rss'), - (u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=rss')] + feeds = [(u'Wszystkie', u'http://www.historia.org.pl/index.php?format=feed&type=atom'), + (u'Wiadomości', u'http://www.historia.org.pl/index.php/wiadomosci.feed?type=atom'), + (u'Publikacje', u'http://www.historia.org.pl/index.php/publikacje.feed?type=atom'), + (u'Publicystyka', u'http://www.historia.org.pl/index.php/publicystyka.feed?type=atom'), + (u'Recenzje', u'http://historia.org.pl/index.php/recenzje.feed?type=atom'), + (u'Kultura i sztuka', u'http://www.historia.org.pl/index.php/kultura-i-sztuka.feed?type=atom'), + (u'Rekonstykcje', u'http://www.historia.org.pl/index.php/rekonstrukcje.feed?type=atom'), + (u'Projekty', u'http://www.historia.org.pl/index.php/projekty.feed?type=atom'), + (u'Konkursy'), (u'http://www.historia.org.pl/index.php/konkursy.feed?type=atom')] + + + def print_version(self, url): + return url + '?tmpl=component&print=1&layout=default&page=' \ No newline at end of file diff --git a/recipes/oclab_pl.recipe b/recipes/oclab_pl.recipe index b0df89ba72..c00ec0d61c 100644 --- a/recipes/oclab_pl.recipe +++ b/recipes/oclab_pl.recipe @@ -11,7 +11,7 @@ class OCLab(BasicNewsRecipe): no_stylesheets = True keep_only_tags=[dict(id='main')] remove_tags_after= dict(attrs={'class':'single-postmetadata'}) - remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar']})] + remove_tags=[dict(attrs={'class':['single-postmetadata', 'pagebar', 'shr-bookmarks shr-bookmarks-expand shr-bookmarks-center shr-bookmarks-bg-enjoy']})] feeds = [(u'Wpisy', u'http://oclab.pl/feed/')] diff --git a/recipes/polska_times.recipe b/recipes/polska_times.recipe index 4126576fe2..85769193e5 100644 --- a/recipes/polska_times.recipe +++ b/recipes/polska_times.recipe @@ -11,70 +11,19 @@ class Polska_times(BasicNewsRecipe): max_articles_per_feed = 100 remove_emty_feeds= True no_stylesheets = True - preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'