diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe index c2576191dd..b7d3d2583c 100644 --- a/recipes/antyweb.recipe +++ b/recipes/antyweb.recipe @@ -43,6 +43,6 @@ class AntywebRecipe(BasicNewsRecipe): def preprocess_html(self, soup): for alink in soup.findAll('a'): if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) - return soup + tstr = alink.string + alink.replaceWith(tstr) + return soup diff --git a/recipes/app_funds.recipe b/recipes/app_funds.recipe index d5734fc451..b0ecbc87b3 100644 --- a/recipes/app_funds.recipe +++ b/recipes/app_funds.recipe @@ -24,4 +24,3 @@ class app_funds(BasicNewsRecipe): auto_cleanup = True feeds = [(u'blog', u'http://feeds.feedburner.com/blogspot/etVI')] - diff --git a/recipes/bankier_pl.recipe b/recipes/bankier_pl.recipe index 8a68d844b3..a9701a80fe 100644 --- a/recipes/bankier_pl.recipe +++ b/recipes/bankier_pl.recipe @@ -47,4 +47,3 @@ class bankier(BasicNewsRecipe): segments = urlPart.split('-') urlPart2 = segments[-1] return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2 - diff --git a/recipes/gazeta_pl_krakow.recipe b/recipes/gazeta_pl_krakow.recipe index 59188a5d6a..59b3b00933 100644 --- a/recipes/gazeta_pl_krakow.recipe +++ b/recipes/gazeta_pl_krakow.recipe @@ -49,8 +49,8 @@ class gw_krakow(BasicNewsRecipe): feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')] def skip_ad_pages(self, soup): - tag=soup.find(name='a', attrs={'class':'btn'}) - if tag: + tag=soup.find(name='a', attrs={'class':'btn'}) + if tag: new_soup=self.index_to_soup(tag['href'], raw=True) return new_soup @@ -95,8 +95,7 @@ class gw_krakow(BasicNewsRecipe): rem.extract() def preprocess_html(self, soup): - self.append_page(soup, soup.body) - if soup.find(id='container_gal'): - self.gallery_article(soup.body) - return soup - + self.append_page(soup, soup.body) + if soup.find(id='container_gal'): + self.gallery_article(soup.body) + return soup diff --git a/recipes/gazeta_pl_warszawa.recipe b/recipes/gazeta_pl_warszawa.recipe index 2d95bcc06f..9e10a0610c 100644 --- a/recipes/gazeta_pl_warszawa.recipe +++ b/recipes/gazeta_pl_warszawa.recipe @@ -46,8 +46,8 @@ class gw_wawa(BasicNewsRecipe): feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')] def skip_ad_pages(self, soup): - tag=soup.find(name='a', attrs={'class':'btn'}) - if tag: + tag=soup.find(name='a', attrs={'class':'btn'}) + if tag: new_soup=self.index_to_soup(tag['href'], raw=True) return new_soup @@ -92,8 +92,7 @@ class gw_wawa(BasicNewsRecipe): rem.extract() def preprocess_html(self, soup): - self.append_page(soup, soup.body) - if soup.find(id='container_gal'): - self.gallery_article(soup.body) - return soup - + self.append_page(soup, soup.body) + if soup.find(id='container_gal'): + self.gallery_article(soup.body) + return soup diff --git a/recipes/gazeta_pomorska.recipe b/recipes/gazeta_pomorska.recipe index 083f5cbeed..c611a513c8 100644 --- a/recipes/gazeta_pomorska.recipe +++ b/recipes/gazeta_pomorska.recipe @@ -47,12 +47,12 @@ class GazetaPomorska(BasicNewsRecipe): # # PomorskaRSS - wiadomosci kazdego typu, zakomentuj znakiem "#" # # przed odkomentowaniem wiadomosci wybranego typu: (u'PomorskaRSS', u'http://www.pomorska.pl/rss.xml') - + # # wiadomosci z regionu nie przypisane do okreslonego miasta: # (u'Region', u'http://www.pomorska.pl/region.xml'), - + # # wiadomosci przypisane do miast: - # (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), + # (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), # (u'Nak\u0142o', u'http://www.pomorska.pl/naklo.xml'), # (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'), # (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'), @@ -73,9 +73,9 @@ class GazetaPomorska(BasicNewsRecipe): # (u'\u015awiecie', u'http://www.pomorska.pl/swiecie.xml'), # (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'), # (u'\u017bnin', u'http://www.pomorska.pl/znin.xml') - + # # wiadomosci tematyczne (redundancja z region/miasta): - # (u'Sport', u'http://www.pomorska.pl/sport.xml'), + # (u'Sport', u'http://www.pomorska.pl/sport.xml'), # (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'), # (u'Auto', u'http://www.pomorska.pl/moto.xml'), # (u'Dom', u'http://www.pomorska.pl/dom.xml'), @@ -100,5 +100,3 @@ class GazetaPomorska(BasicNewsRecipe): extra_css = '''h1 { font-size: 1.4em; } h2 { font-size: 1.0em; }''' - - diff --git a/recipes/hatalska.recipe b/recipes/hatalska.recipe new file mode 100644 index 0000000000..10502a427a --- /dev/null +++ b/recipes/hatalska.recipe @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'teepel 2012' + +''' +hatalska.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class hatalska(BasicNewsRecipe): + title = u'Hatalska' + __author__ = 'teepel ' + language = 'pl' + description = u'Blog specjalistki z branży mediowo-reklamowej - Natalii Hatalskiej' + oldest_article = 7 + masthead_url='http://hatalska.com/wp-content/themes/jamel/images/logo.png' + max_articles_per_feed = 100 + simultaneous_downloads = 5 + remove_javascript=True + no_stylesheets=True + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'feedflare'})) + + feeds = [(u'Blog', u'http://feeds.feedburner.com/hatalskacom')] diff --git a/recipes/icons/eksiazki.png b/recipes/icons/eksiazki.png index 6f4f18d991..f17e877f09 100644 Binary files a/recipes/icons/eksiazki.png and b/recipes/icons/eksiazki.png differ diff --git a/recipes/icons/hatalska.png b/recipes/icons/hatalska.png new file mode 100644 index 0000000000..f6d20feb34 Binary files /dev/null and b/recipes/icons/hatalska.png differ diff --git a/recipes/icons/lifehacker_pl.png b/recipes/icons/lifehacker_pl.png new file mode 100644 index 0000000000..2019e91691 Binary files /dev/null and b/recipes/icons/lifehacker_pl.png differ diff --git a/recipes/icons/money_pl.png b/recipes/icons/money_pl.png new file mode 100644 index 0000000000..7a6958856e Binary files /dev/null and b/recipes/icons/money_pl.png differ diff --git a/recipes/icons/tvn24.png b/recipes/icons/tvn24.png index 864a6624ac..152b15af87 100644 Binary files a/recipes/icons/tvn24.png and b/recipes/icons/tvn24.png differ diff --git a/recipes/icons/wprost.png b/recipes/icons/wprost.png index f81878f2d2..d844978175 100644 Binary files a/recipes/icons/wprost.png and b/recipes/icons/wprost.png differ diff --git a/recipes/icons/wprost_rss.png b/recipes/icons/wprost_rss.png index 5ce1b5563d..d844978175 100644 Binary files a/recipes/icons/wprost_rss.png and b/recipes/icons/wprost_rss.png differ diff --git a/recipes/kp.recipe b/recipes/kp.recipe index f52fcef60b..85bf356b4d 100644 --- a/recipes/kp.recipe +++ b/recipes/kp.recipe @@ -47,6 +47,6 @@ class KrytykaPolitycznaRecipe(BasicNewsRecipe): def preprocess_html(self, soup): for alink in soup.findAll('a'): if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) + tstr = alink.string + alink.replaceWith(tstr) return soup diff --git a/recipes/lifehacker_pl.recipe b/recipes/lifehacker_pl.recipe new file mode 100644 index 0000000000..84609ca82e --- /dev/null +++ b/recipes/lifehacker_pl.recipe @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'MrStefan' + +''' +www.lifehacking.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class lifehacking(BasicNewsRecipe): + title = u'Lifehacker Polska' + __author__ = 'MrStefan ' + language = 'pl' + description =u'Lifehacking - sposoby na zwiększanie własnej wydajności. Ułatwiaj sobie życie, wykorzystując wiedzę, metody, technologie, przydatne strony ...' + masthead_url='http://lifehacking.pl/wp-content/themes/lifehacking/images/lifehackerlogo.png' + remove_empty_feeds= True + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + feeds = [(u'Lifehacker polska', u'http://feeds.feedburner.com/pl_lh')] diff --git a/recipes/michalkiewicz.recipe b/recipes/michalkiewicz.recipe index 7a1e424f0b..4e4aee0db6 100644 --- a/recipes/michalkiewicz.recipe +++ b/recipes/michalkiewicz.recipe @@ -8,8 +8,6 @@ michalkiewicz.pl from calibre.web.feeds.news import BasicNewsRecipe -# - class michalkiewicz(BasicNewsRecipe): title = u'Stanis\u0142aw Michalkiewicz' description = u'Strona autorska * felietony * artyku\u0142y * komentarze' @@ -23,4 +21,3 @@ class michalkiewicz(BasicNewsRecipe): remove_tags = [dict(name='ul', attrs={'class':'menu'})] feeds = [(u'Teksty', u'http://www.michalkiewicz.pl/rss.xml')] - diff --git a/recipes/money_pl.recipe b/recipes/money_pl.recipe index 475c2059ff..8171d05a07 100644 --- a/recipes/money_pl.recipe +++ b/recipes/money_pl.recipe @@ -60,7 +60,7 @@ class FocusRecipe(BasicNewsRecipe): ] def print_version(self, url): - if url.count ('money.pl.feedsportal.com'): + if url.count ('money.pl.feedsportal.com'): u = url.find('0Cartykul0C') u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:] u = u.replace('0C', '/') @@ -71,6 +71,6 @@ class FocusRecipe(BasicNewsRecipe): u = u.replace ('0B','.') u = u.replace (',0,',',-1,') u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '') - else: + else: u = url.replace('/nc/1','/do-druku/1') - return u + return u diff --git a/recipes/myapple_pl.recipe b/recipes/myapple_pl.recipe index df5708a325..6f7f37633e 100644 --- a/recipes/myapple_pl.recipe +++ b/recipes/myapple_pl.recipe @@ -44,6 +44,6 @@ class MyAppleRecipe(BasicNewsRecipe): def preprocess_html(self, soup): for alink in soup.findAll('a'): if alink.string is not None: - tstr = alink.string - alink.replaceWith(tstr) + tstr = alink.string + alink.replaceWith(tstr) return soup diff --git a/recipes/naszdziennik.recipe b/recipes/naszdziennik.recipe index 4c7b78c199..7d3f27da8b 100644 --- a/recipes/naszdziennik.recipe +++ b/recipes/naszdziennik.recipe @@ -15,7 +15,7 @@ class naszdziennik(BasicNewsRecipe): no_stylesheets = True keep_only_tags =[dict(attrs = {'id' : 'article'})] - + #definiujemy nową funkcje; musi zwracać listę feedów wraz z artykułami def parse_index(self): #adres do parsowania artykułów @@ -28,7 +28,7 @@ class naszdziennik(BasicNewsRecipe): sections = [] #deklaracja pierwszej sekcji jako pusty string section = '' - + #pętla for, która analizuje po kolei każdy tag "news-article" for item in soup.findAll(attrs = {'class' : 'news-article'}) : #w tagu "news-article szukamy pierwszego taga h4" @@ -51,11 +51,11 @@ class naszdziennik(BasicNewsRecipe): #jako tytuł użyty będzie tekst pomiędzy tagami article_title = self.tag_to_string(article_a) #a data będzie tekstem z pierwszego taga h4 znalezionego w tagu title-datetime - article_date = self.tag_to_string(article_title_datetime.find('h4')) + article_date = self.tag_to_string(article_title_datetime.find('h4')) #zebrane elementy dodajemy do listy zadeklarowanej w linijce 44 articles[section].append( { 'title' : article_title, 'url' : article_url, 'date' : article_date }) #po dodaniu wszystkich artykułów dodajemy sekcje do listy feedów, korzystając z list sekcji znajdujących się w słowniku for section in sections: feeds.append((section, articles[section])) #zwracamy listę feedów, której parsowaniem zajmie się calibre - return feeds \ No newline at end of file + return feeds diff --git a/recipes/rynek_kolejowy.recipe b/recipes/rynek_kolejowy.recipe index f68b33f84b..181bf5b6d3 100644 --- a/recipes/rynek_kolejowy.recipe +++ b/recipes/rynek_kolejowy.recipe @@ -37,4 +37,3 @@ class rynek_kolejowy(BasicNewsRecipe): segment = url.split('/') urlPart = segment[3] return 'http://www.rynek-kolejowy.pl/drukuj.php?id=' + urlPart - diff --git a/recipes/rzeczpospolita.recipe b/recipes/rzeczpospolita.recipe index 40cb4db3ac..d1453ab57e 100644 --- a/recipes/rzeczpospolita.recipe +++ b/recipes/rzeczpospolita.recipe @@ -70,5 +70,3 @@ class RzeczpospolitaRecipe(BasicNewsRecipe): forget, sep, index = rest.rpartition(',') return start + '/' + index + '?print=tak' - - diff --git a/recipes/samcik_blox.recipe b/recipes/samcik_blox.recipe index 69bb836c76..0db6b7e8af 100644 --- a/recipes/samcik_blox.recipe +++ b/recipes/samcik_blox.recipe @@ -22,5 +22,5 @@ class samcik(BasicNewsRecipe): remove_tags =[] remove_tags.append(dict(name = 'table', attrs = {'border' : '0'})) - + feeds = [(u'Wpisy', u'http://samcik.blox.pl/rss2')] diff --git a/recipes/satkurier.recipe b/recipes/satkurier.recipe index 382f7f8180..6c7520f47b 100644 --- a/recipes/satkurier.recipe +++ b/recipes/satkurier.recipe @@ -8,10 +8,7 @@ class SATKurier(BasicNewsRecipe): title = u'SATKurier.pl' __author__ = 'Artur Stachecki ' language = 'pl' - description = u'Największy i najstarszy serwis poświęcony\ - telewizji cyfrowej, przygotowywany przez wydawcę\ - miesięcznika SAT Kurier. Bieżące wydarzenia\ - z rynku mediów i nowych technologii.' + description = u'Serwis poświęcony telewizji cyfrowej' oldest_article = 7 masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif' max_articles_per_feed = 100 diff --git a/recipes/swiatkindle.recipe b/recipes/swiatkindle.recipe index 9847d1359e..d8e0e3f403 100644 --- a/recipes/swiatkindle.recipe +++ b/recipes/swiatkindle.recipe @@ -22,4 +22,3 @@ class swiatczytnikow(BasicNewsRecipe): remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})] preprocess_regexps = [(re.compile(u'

Czytaj dalej:

'), lambda match: '')] - diff --git a/recipes/telepolis_pl.recipe b/recipes/telepolis_pl.recipe index ff4803697f..06739fe31d 100644 --- a/recipes/telepolis_pl.recipe +++ b/recipes/telepolis_pl.recipe @@ -8,60 +8,20 @@ import re class telepolis(BasicNewsRecipe): title = u'Telepolis.pl' - __author__ = 'Artur Stachecki ' + __author__ = 'Artur Stachecki , Tomasz Długosz ' + language = 'pl' - description = u'Twój telekomunikacyjny serwis informacyjny.\ - Codzienne informacje, testy i artykuły,\ - promocje, baza telefonów oraz centrum rozrywki' - oldest_article = 7 + description = u'Twój telekomunikacyjny serwis informacyjny.' masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif' - max_articles_per_feed = 100 - simultaneous_downloads = 5 - remove_javascript = True no_stylesheets = True use_embedded_content = False - remove_tags = [] - remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'})) - - preprocess_regexps = [(re.compile(r'<: .*? :>'), - lambda match: ''), - (re.compile(r'Zobacz:.*?
', re.DOTALL), - lambda match: ''), - (re.compile(r'<-ankieta.*?>'), - lambda match: ''), - (re.compile(r'\(Q\!\)'), - lambda match: ''), - (re.compile(r'\(plik.*?\)'), - lambda match: ''), - (re.compile(r'', re.DOTALL), - lambda match: '') - ] - - extra_css = '''.tb { font-weight: bold; font-size: 20px;}''' - feeds = [ - (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'), - (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') + (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#, + #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') ] - def print_version(self, url): - if 'news.php' in url: - print_url = url.replace('news.php', 'news_print.php') - else: - print_url = url.replace('artykuly.php', 'art_print.php') - return print_url - - def preprocess_html(self, soup): - for image in soup.findAll('img'): - if 'm.jpg' in image['src']: - image_big = image['src'] - image_big = image_big.replace('m.jpg', '.jpg') - image['src'] = image_big - logo = soup.find('tr') - logo.extract() - for tag in soup.findAll('tr'): - for strings in ['Wiadomość wydrukowana', 'copyright']: - if strings in self.tag_to_string(tag): - tag.extract() - return self.adeify_images(soup) + keep_only_tags = [ + dict(name='div', attrs={'class':'flol w510'}), + dict(name='div', attrs={'class':'main_tresc_news'}) + ]