diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe new file mode 100644 index 0000000000..c2576191dd --- /dev/null +++ b/recipes/antyweb.recipe @@ -0,0 +1,48 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class AntywebRecipe(BasicNewsRecipe): + encoding = 'utf-8' + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + title = u'Antyweb' + category = u'News' + description = u'Blog o internecie i nowych technologiach' + cover_url='' + remove_empty_feeds= True + auto_cleanup = False + no_stylesheets=True + use_embedded_content = False + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript = True + simultaneous_downloads = 3 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'})) + + + remove_tags =[] + remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'})) + remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'})) + remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'})) + + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} + ''' + + feeds = [ + (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'), + ] + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/bankier_pl.recipe b/recipes/bankier_pl.recipe new file mode 100644 index 0000000000..8a68d844b3 --- /dev/null +++ b/recipes/bankier_pl.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel ' + +''' +bankier.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class bankier(BasicNewsRecipe): + title = u'Bankier.pl' + __author__ = 'teepel ' + language = 'pl' + description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.' + masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif' + INDEX='http://bankier.pl/' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + simultaneous_downloads = 5 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'})) + + remove_tags =[] + remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'})) + remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'})) + remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'})) + #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'})) + #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'})) + + feeds = [ + (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'), + (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'), + (u'Firma', u'http://feeds.feedburner.com/bankier-firma'), + (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'), + (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'), + (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'), + ] + def print_version(self, url): + segment = url.split('.') + urlPart = segment[2] + segments = urlPart.split('-') + urlPart2 = segments[-1] + return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2 + diff --git a/recipes/f1_ultra.recipe b/recipes/f1_ultra.recipe new file mode 100644 index 0000000000..ada82542fc --- /dev/null +++ b/recipes/f1_ultra.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class f1ultra(BasicNewsRecipe): + title = u'Formuła 1 - F1 ultra' + __license__ = 'GPL v3' + __author__ = 'MrStefan , Artur Stachecki ' + language = 'pl' + description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.' + masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))] + remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})] + remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))] + remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']})) + remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'})) + remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'})) + + preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''), + (re.compile(r'align="right"'), lambda match: ''), + (re.compile(r'width=\"*\"'), lambda match: ''), + (re.compile(r'\'), lambda match: '')] + + + extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; } + img { display: block; clear: both;} + ''' + remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align'] + + feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')] diff --git a/recipes/gazeta_pl_krakow.recipe b/recipes/gazeta_pl_krakow.recipe index 0f35e536f6..59188a5d6a 100644 --- a/recipes/gazeta_pl_krakow.recipe +++ b/recipes/gazeta_pl_krakow.recipe @@ -8,7 +8,6 @@ krakow.gazeta.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class gw_krakow(BasicNewsRecipe): title = u'Gazeta.pl Kraków' @@ -46,7 +45,7 @@ class gw_krakow(BasicNewsRecipe): remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'})) remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})] - + feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')] def skip_ad_pages(self, soup): diff --git a/recipes/gazeta_pl_warszawa.recipe b/recipes/gazeta_pl_warszawa.recipe index 7a43931db4..2d95bcc06f 100644 --- a/recipes/gazeta_pl_warszawa.recipe +++ b/recipes/gazeta_pl_warszawa.recipe @@ -8,7 +8,6 @@ warszawa.gazeta.pl ''' from calibre.web.feeds.news import BasicNewsRecipe -import re class gw_wawa(BasicNewsRecipe): title = u'Gazeta.pl Warszawa' @@ -43,7 +42,7 @@ class gw_wawa(BasicNewsRecipe): remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'})) remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'})) remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'})) - + feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')] def skip_ad_pages(self, soup): diff --git a/recipes/icons/antyweb.png b/recipes/icons/antyweb.png new file mode 100644 index 0000000000..8ca9870f60 Binary files /dev/null and b/recipes/icons/antyweb.png differ diff --git a/recipes/icons/bankier_pl.png b/recipes/icons/bankier_pl.png new file mode 100644 index 0000000000..c26f006a57 Binary files /dev/null and b/recipes/icons/bankier_pl.png differ diff --git a/recipes/icons/f1_ultra.png b/recipes/icons/f1_ultra.png new file mode 100644 index 0000000000..f45a94f53a Binary files /dev/null and b/recipes/icons/f1_ultra.png differ diff --git a/recipes/icons/myapple_pl.png b/recipes/icons/myapple_pl.png new file mode 100644 index 0000000000..a68cf4e7ef Binary files /dev/null and b/recipes/icons/myapple_pl.png differ diff --git a/recipes/icons/telepolis_pl.png b/recipes/icons/telepolis_pl.png new file mode 100644 index 0000000000..0b94658d94 Binary files /dev/null and b/recipes/icons/telepolis_pl.png differ diff --git a/recipes/myapple_pl.recipe b/recipes/myapple_pl.recipe new file mode 100644 index 0000000000..df5708a325 --- /dev/null +++ b/recipes/myapple_pl.recipe @@ -0,0 +1,49 @@ + +from calibre.web.feeds.news import BasicNewsRecipe + +class MyAppleRecipe(BasicNewsRecipe): + __license__ = 'GPL v3' + __author__ = u'Artur Stachecki ' + language = 'pl' + version = 1 + + title = u'MyApple.pl' + category = u'News' + description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.' + cover_url='' + remove_empty_feeds= True + no_stylesheets=True + oldest_article = 7 + max_articles_per_feed = 100000 + recursions = 0 + + no_stylesheets = True + remove_javascript = True + simultaneous_downloads = 3 + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'})) + + + extra_css = ''' + body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} + td.contentheading{font-size: large; font-weight: bold;} + ''' + + feeds = [ + ('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent§ionid=1&days=120&count=10'), + ] + + def preprocess_html(self, soup): + for alink in soup.findAll('a'): + if alink.string is not None: + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/telepolis_pl.recipe b/recipes/telepolis_pl.recipe new file mode 100644 index 0000000000..ff4803697f --- /dev/null +++ b/recipes/telepolis_pl.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + + +class telepolis(BasicNewsRecipe): + title = u'Telepolis.pl' + __author__ = 'Artur Stachecki ' + language = 'pl' + description = u'Twój telekomunikacyjny serwis informacyjny.\ + Codzienne informacje, testy i artykuły,\ + promocje, baza telefonów oraz centrum rozrywki' + oldest_article = 7 + masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + remove_tags = [] + remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'})) + + preprocess_regexps = [(re.compile(r'<: .*? :>'), + lambda match: ''), + (re.compile(r'Zobacz:.*?', re.DOTALL), + lambda match: ''), + (re.compile(r'<-ankieta.*?>'), + lambda match: ''), + (re.compile(r'\(Q\!\)'), + lambda match: ''), + (re.compile(r'\(plik.*?\)'), + lambda match: ''), + (re.compile(r'', re.DOTALL), + lambda match: '') + ] + + extra_css = '''.tb { font-weight: bold; font-size: 20px;}''' + + feeds = [ + (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'), + (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') + ] + + def print_version(self, url): + if 'news.php' in url: + print_url = url.replace('news.php', 'news_print.php') + else: + print_url = url.replace('artykuly.php', 'art_print.php') + return print_url + + def preprocess_html(self, soup): + for image in soup.findAll('img'): + if 'm.jpg' in image['src']: + image_big = image['src'] + image_big = image_big.replace('m.jpg', '.jpg') + image['src'] = image_big + logo = soup.find('tr') + logo.extract() + for tag in soup.findAll('tr'): + for strings in ['Wiadomość wydrukowana', 'copyright']: + if strings in self.tag_to_string(tag): + tag.extract() + return self.adeify_images(soup)