diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index b02460695e..2a6cf9957d 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -3,7 +3,7 @@ import re class Adventure_zone(BasicNewsRecipe): title = u'Adventure Zone' __author__ = 'fenuks' - description = u'Adventure zone - adventure games from A to Z' + description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' category = 'games' language = 'pl' no_stylesheets = True @@ -78,4 +78,4 @@ class Adventure_zone(BasicNewsRecipe): a['href']=self.index + a['href'] return soup - \ No newline at end of file + diff --git a/recipes/archeowiesci.recipe b/recipes/archeowiesci.recipe index 6bcc9bef6c..57647d7469 100644 --- a/recipes/archeowiesci.recipe +++ b/recipes/archeowiesci.recipe @@ -5,6 +5,7 @@ class Archeowiesci(BasicNewsRecipe): __author__ = 'fenuks' category = 'archeology' language = 'pl' + description = u'Z pasją o przeszłości' cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg' oldest_article = 7 needs_subscription='optional' @@ -29,4 +30,4 @@ class Archeowiesci(BasicNewsRecipe): br['log'] = self.username br['pwd'] = self.password br.submit() - return br \ No newline at end of file + return br diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe index 2808fed6e1..b7a15a9809 100644 --- a/recipes/astro_news_pl.recipe +++ b/recipes/astro_news_pl.recipe @@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AstroNEWS(BasicNewsRecipe): title = u'AstroNEWS' __author__ = 'fenuks' - description = 'AstroNEWS- astronomy every day' + description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' category = 'astronomy, science' language = 'pl' oldest_article = 8 diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe index 0b92fdfa29..745ade420c 100644 --- a/recipes/astroflesz.recipe +++ b/recipes/astroflesz.recipe @@ -13,6 +13,7 @@ class Astroflesz(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + remove_attributes = ['style'] keep_only_tags = [dict(id="k2Container")] remove_tags_after = dict(name='div', attrs={'class':'itemLinks'}) remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})] diff --git a/recipes/astronomia_pl.recipe b/recipes/astronomia_pl.recipe index 89a0e4c889..aa84860976 100644 --- a/recipes/astronomia_pl.recipe +++ b/recipes/astronomia_pl.recipe @@ -3,7 +3,7 @@ import re class Astronomia_pl(BasicNewsRecipe): title = u'Astronomia.pl' __author__ = 'fenuks' - description = 'Astronomia - polish astronomy site' + description = u'Astronomia.pl jest edukacyjnym portalem skierowanym do uczniów, studentów i miłośników astronomii. Przedstawiamy gwiazdy, planety, galaktyki, czarne dziury i wiele innych tajemnic Wszechświata.' masthead_url = 'http://www.astronomia.pl/grafika/logo.gif' cover_url = 'http://www.astronomia.pl/grafika/logo.gif' category = 'astronomy, science' diff --git a/recipes/bash_org_pl.recipe b/recipes/bash_org_pl.recipe index a04f267ca3..b772b7c3b4 100644 --- a/recipes/bash_org_pl.recipe +++ b/recipes/bash_org_pl.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bash_org_pl(BasicNewsRecipe): title = u'Bash.org.pl' __author__ = 'fenuks' - description = 'Bash.org.pl - funny quotations from IRC discussions' + description = 'Bash.org.pl - zabawne cytaty z IRC' category = 'funny quotations, humour' language = 'pl' cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png' @@ -51,4 +51,4 @@ class Bash_org_pl(BasicNewsRecipe): feeds = [] feeds.append((u"Najnowsze", self.latest_articles())) feeds.append((u"Losowe", self.random_articles())) - return feeds \ No newline at end of file + return feeds diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe index 66d4f35e73..95c5488a24 100644 --- a/recipes/benchmark_pl.recipe +++ b/recipes/benchmark_pl.recipe @@ -3,14 +3,15 @@ import re class BenchmarkPl(BasicNewsRecipe): title = u'Benchmark.pl' __author__ = 'fenuks' - description = u'benchmark.pl -IT site' + description = u'benchmark.pl, recenzje i testy sprzętu, aktualności, rankingi, sterowniki, porady, opinie' masthead_url = 'http://www.benchmark.pl/i/logo-footer.png' - cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif' + cover_url = 'http://www.benchmark.pl/i/logo-dark.png' category = 'IT' language = 'pl' oldest_article = 8 max_articles_per_feed = 100 - no_stylesheets=True + no_stylesheets = True + remove_attributes = ['style'] preprocess_regexps = [(re.compile(ur'

 Zobacz poprzednie Opinie dnia:.*', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Więcej o .*?', re.DOTALL|re.IGNORECASE), lambda match: '')] keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')] remove_tags_after=dict(name='div', attrs={'class':'body'}) @@ -21,17 +22,18 @@ class BenchmarkPl(BasicNewsRecipe): def append_page(self, soup, appendtag): - nexturl = soup.find('span', attrs={'class':'next'}) - while nexturl is not None: - nexturl= self.INDEX + nexturl.parent['href'] - soup2 = self.index_to_soup(nexturl) - nexturl=soup2.find('span', attrs={'class':'next'}) + nexturl = soup.find(attrs={'class':'next'}) + while nexturl: + soup2 = self.index_to_soup(nexturl['href']) + nexturl = soup2.find(attrs={'class':'next'}) pagetext = soup2.find(name='div', attrs={'class':'body'}) appendtag.find('div', attrs={'class':'k_ster'}).extract() pos = len(appendtag.contents) appendtag.insert(pos, pagetext) - if appendtag.find('div', attrs={'class':'k_ster'}) is not None: + if appendtag.find('div', attrs={'class':'k_ster'}): appendtag.find('div', attrs={'class':'k_ster'}).extract() + for r in appendtag.findAll(attrs={'class':'changePage'}): + r.extract() def image_article(self, soup, appendtag): diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe index 4e19fbc6c1..6be7a2ae12 100644 --- a/recipes/cd_action.recipe +++ b/recipes/cd_action.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class CD_Action(BasicNewsRecipe): title = u'CD-Action' __author__ = 'fenuks' - description = 'cdaction.pl - polish games magazine site' + description = 'Strona CD-Action (CDA), największego w Polsce pisma dla graczy.Pełne wersje gier, newsy, recenzje, zapowiedzi, konkursy, forum, opinie, galerie screenów,trailery, filmiki, patche, teksty. Gry komputerowe (PC) oraz na konsole (PS3, XBOX 360).' category = 'games' language = 'pl' index='http://www.cdaction.pl' @@ -24,4 +24,4 @@ class CD_Action(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/computerworld_pl.recipe b/recipes/computerworld_pl.recipe index 2ec457e4de..e9aab68226 100644 --- a/recipes/computerworld_pl.recipe +++ b/recipes/computerworld_pl.recipe @@ -7,17 +7,13 @@ class Computerworld_pl(BasicNewsRecipe): description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne' category = 'IT' language = 'pl' - masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' - no_stylesheets=True + masthead_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' + cover_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' + no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100 - keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})] - remove_tags_after=dict(name='div', attrs={'class':'rMobi'}) - remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})] + keep_only_tags = [dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})] + remove_tags_after = dict(name='div', attrs={'class':'rMobi'}) + remove_tags = [dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})] feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')] - def get_cover_url(self): - soup = self.index_to_soup('http://www.computerworld.pl/') - cover=soup.find(name='img', attrs={'class':'prawo'}) - self.cover_url=cover['src'] - return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/conowego_pl.recipe b/recipes/conowego_pl.recipe index 8b4288ddcd..f180a756b2 100644 --- a/recipes/conowego_pl.recipe +++ b/recipes/conowego_pl.recipe @@ -4,11 +4,12 @@ class CoNowegoPl(BasicNewsRecipe): title = u'conowego.pl' __author__ = 'fenuks' description = u'Nowy wortal technologiczny oraz gazeta internetowa. Testy najnowszych produktów, fachowe porady i recenzje. U nas znajdziesz wszystko o elektronice użytkowej !' - cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png' + #cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png' category = 'IT, news' language = 'pl' oldest_article = 7 max_articles_per_feed = 100 + INDEX = 'http://www.conowego.pl/' no_stylesheets = True remove_empty_feeds = True use_embedded_content = False @@ -36,3 +37,10 @@ class CoNowegoPl(BasicNewsRecipe): for r in appendtag.findAll(attrs={'class':['pages', 'paginationWrap']}): r.extract() + + def get_cover_url(self): + soup = self.index_to_soup('http://www.conowego.pl/magazyn/') + tag = soup.find(attrs={'class':'ms_left'}) + if tag: + self.cover_url = self.INDEX + tag.find('img')['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/czas_gentlemanow.recipe b/recipes/czas_gentlemanow.recipe index 6df677f25f..009cc7e9dd 100644 --- a/recipes/czas_gentlemanow.recipe +++ b/recipes/czas_gentlemanow.recipe @@ -1,4 +1,5 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +import re from calibre.web.feeds.news import BasicNewsRecipe class CzasGentlemanow(BasicNewsRecipe): @@ -13,8 +14,9 @@ class CzasGentlemanow(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True + preprocess_regexps = [(re.compile(u'

Może Cię też zainteresować:

'), lambda m: '')] use_embedded_content = False keep_only_tags = [dict(name='div', attrs={'class':'content'})] - remove_tags = [dict(attrs={'class':'meta_comments'})] - remove_tags_after = dict(name='div', attrs={'class':'fblikebutton_button'}) + remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])] + remove_tags_after = dict(id='comments') feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')] diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index a4e24ac61b..708bdbb017 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -18,7 +18,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): max_articles_per_feed = 100 preprocess_regexps = [(re.compile(ur'
Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...
'), lambda match: '') ] keep_only_tags=[dict(attrs={'class':['news', 'entry single']})] - remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze')] + remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')] #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})] feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index 603591e9f0..b74f18c006 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Dzieje(BasicNewsRecipe): title = u'dzieje.pl' __author__ = 'fenuks' - description = 'Dzieje - history of Poland' + description = 'Dzieje.pl - najlepszy portal informacyjno-edukacyjny dotyczący historii Polski XX wieku. Archiwalne fotografie, filmy, katalog postaci, quizy i konkursy.' cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png' category = 'history' language = 'pl' @@ -67,4 +67,4 @@ class Dzieje(BasicNewsRecipe): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] self.append_page(soup, soup.body) - return soup \ No newline at end of file + return soup diff --git a/recipes/eioba.recipe b/recipes/eioba.recipe index 1df79d64bd..f55138931c 100644 --- a/recipes/eioba.recipe +++ b/recipes/eioba.recipe @@ -4,6 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class eioba(BasicNewsRecipe): title = u'eioba' __author__ = 'fenuks' + description = u'eioba.pl - daj się przeczytać!' cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png' language = 'pl' oldest_article = 7 diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe index 34871ea04a..1886c2aecd 100644 --- a/recipes/elektroda_pl.recipe +++ b/recipes/elektroda_pl.recipe @@ -5,7 +5,7 @@ class Elektroda(BasicNewsRecipe): title = u'Elektroda' oldest_article = 8 __author__ = 'fenuks' - description = 'Elektroda.pl' + description = 'Międzynarodowy portal elektroniczny udostępniający bogate zasoby z dziedziny elektroniki oraz forum dyskusyjne.' cover_url = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif' category = 'electronics' language = 'pl' diff --git a/recipes/emuzica_pl.recipe b/recipes/emuzica_pl.recipe index 2fbf9ff514..0b3b207c5e 100644 --- a/recipes/emuzica_pl.recipe +++ b/recipes/emuzica_pl.recipe @@ -12,6 +12,7 @@ class eMuzyka(BasicNewsRecipe): no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100 + remove_attributes = ['style'] keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})] remove_tags=[dict(name='span', attrs={'id':'date'})] feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')] @@ -20,4 +21,4 @@ class eMuzyka(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index 6b014e8f93..b1d7f5c578 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -4,21 +4,21 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup class FilmWebPl(BasicNewsRecipe): title = u'FilmWeb' __author__ = 'fenuks' - description = 'FilmWeb - biggest polish movie site' - cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png' + description = 'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy. Największa baza filmów, seriali i aktorów, repertuar kin i tv, ...' + cover_url = 'http://gfx.filmweb.pl/n/logo-filmweb-bevel.jpg' category = 'movies' language = 'pl' - index='http://www.filmweb.pl' + index = 'http://www.filmweb.pl' oldest_article = 8 max_articles_per_feed = 100 - no_stylesheets= True - remove_empty_feeds=True + no_stylesheets = True + remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')] extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}' - remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})] + remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})] remove_attributes = ['style',] - keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})] + keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})] feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'), (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'), diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe index 1954fd7803..66864b8561 100644 --- a/recipes/focus_pl.recipe +++ b/recipes/focus_pl.recipe @@ -13,7 +13,7 @@ class FocusRecipe(BasicNewsRecipe): title = u'Focus' publisher = u'Gruner + Jahr Polska' category = u'News' - description = u'Newspaper' + description = u'Focus.pl - pierwszy w Polsce portal społecznościowy dla miłośników nauki. Tematyka: nauka, historia, cywilizacja, technika, przyroda, sport, gadżety' category = 'magazine' cover_url = '' remove_empty_feeds = True diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe index 99df46419a..a482390e0c 100644 --- a/recipes/fotoblogia_pl.recipe +++ b/recipes/fotoblogia_pl.recipe @@ -3,6 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Fotoblogia_pl(BasicNewsRecipe): title = u'Fotoblogia.pl' __author__ = 'fenuks' + description = u'Jeden z największych polskich blogów o fotografii.' category = 'photography' language = 'pl' masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg' @@ -11,6 +12,6 @@ class Fotoblogia_pl(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})] + keep_only_tags=[dict(name='div', attrs={'class':['post-view post-standard', 'photo-container']})] remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})] feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')] diff --git a/recipes/gazeta_pomorska.recipe b/recipes/gazeta_pomorska.recipe index c611a513c8..557fcb726c 100644 --- a/recipes/gazeta_pomorska.recipe +++ b/recipes/gazeta_pomorska.recipe @@ -1,102 +1,91 @@ -#!/usr/bin/env python - -# # Przed uzyciem przeczytaj komentarz w sekcji "feeds" - -__license__ = 'GPL v3' -__copyright__ = u'2010, Richard z forum.eksiazki.org' -'''pomorska.pl''' - import re from calibre.web.feeds.news import BasicNewsRecipe class GazetaPomorska(BasicNewsRecipe): title = u'Gazeta Pomorska' - publisher = u'Gazeta Pomorska' - description = u'Kujawy i Pomorze - wiadomo\u015bci' + __author__ = 'Richard z forum.eksiazki.org, fenuks' + description = u'Gazeta Pomorska - portal regionalny' + category = 'newspaper' language = 'pl' - __author__ = u'Richard z forum.eksiazki.org' - # # (dziekuje t3d z forum.eksiazki.org za testy) - oldest_article = 2 - max_articles_per_feed = 20 + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.pomorska.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True no_stylesheets = True - remove_javascript = True - preprocess_regexps = [ - (re.compile(r'', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - # # PomorskaRSS - wiadomosci kazdego typu, zakomentuj znakiem "#" - # # przed odkomentowaniem wiadomosci wybranego typu: - (u'PomorskaRSS', u'http://www.pomorska.pl/rss.xml') + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] - # # wiadomosci z regionu nie przypisane do okreslonego miasta: - # (u'Region', u'http://www.pomorska.pl/region.xml'), + feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'), + (u'Region', u'http://www.pomorska.pl/region.xml'), + (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), + (u'Nakło', u'http://www.pomorska.pl/naklo.xml'), + (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'), + (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'), + (u'Grudziądz', u'http://www.pomorska.pl/grudziadz.xml'), + (u'Inowrocław', u'http://www.pomorska.pl/inowroclaw.xml'), + (u'Toruń', u'http://www.pomorska.pl/torun.xml'), + (u'Włocławek', u'http://www.pomorska.pl/wloclawek.xml'), + (u'Aleksandrów Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'), + (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'), + (u'Chełmno', u'http://www.pomorska.pl/chelmno.xml'), + (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'), + (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'), + (u'Golub-Dobrzyń', u'http://www.pomorska.pl/golubdobrzyn.xml'), + (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'), + (u'Radziejów', u'http://www.pomorska.pl/radziejow.xml'), + (u'Rypin', u'http://www.pomorska.pl/rypin.xml'), + (u'Sępólno', u'http://www.pomorska.pl/sepolno.xml'), + (u'Świecie', u'http://www.pomorska.pl/swiecie.xml'), + (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'), + (u'Żnin', u'http://www.pomorska.pl/znin.xml'), + (u'Sport', u'http://www.pomorska.pl/sport.xml'), + (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'), + (u'Auto', u'http://www.pomorska.pl/moto.xml'), + (u'Dom', u'http://www.pomorska.pl/dom.xml'), + #(u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'), + (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')] - # # wiadomosci przypisane do miast: - # (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), - # (u'Nak\u0142o', u'http://www.pomorska.pl/naklo.xml'), - # (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'), - # (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'), - # (u'Grudzi\u0105dz', u'http://www.pomorska.pl/grudziadz.xml'), - # (u'Inowroc\u0142aw', u'http://www.pomorska.pl/inowroclaw.xml'), - # (u'Toru\u0144', u'http://www.pomorska.pl/torun.xml'), - # (u'W\u0142oc\u0142awek', u'http://www.pomorska.pl/wloclawek.xml'), - # (u'Aleksandr\u00f3w Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'), - # (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'), - # (u'Che\u0142mno', u'http://www.pomorska.pl/chelmno.xml'), - # (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'), - # (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'), - # (u'Golub Dobrzy\u0144', u'http://www.pomorska.pl/golubdobrzyn.xml'), - # (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'), - # (u'Radziej\u00f3w', u'http://www.pomorska.pl/radziejow.xml'), - # (u'Rypin', u'http://www.pomorska.pl/rypin.xml'), - # (u'S\u0119p\u00f3lno', u'http://www.pomorska.pl/sepolno.xml'), - # (u'\u015awiecie', u'http://www.pomorska.pl/swiecie.xml'), - # (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'), - # (u'\u017bnin', u'http://www.pomorska.pl/znin.xml') + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) - # # wiadomosci tematyczne (redundancja z region/miasta): - # (u'Sport', u'http://www.pomorska.pl/sport.xml'), - # (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'), - # (u'Auto', u'http://www.pomorska.pl/moto.xml'), - # (u'Dom', u'http://www.pomorska.pl/dom.xml'), - # (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'), - # (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml') - ] + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] - keep_only_tags = [dict(name='div', attrs={'id':'article'})] + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) - remove_tags = [ - dict(name='p', attrs={'id':'articleTags'}), - dict(name='div', attrs={'id':'articleEpaper'}), - dict(name='div', attrs={'id':'articleConnections'}), - dict(name='div', attrs={'class':'articleFacts'}), - dict(name='div', attrs={'id':'articleExternalLink'}), - dict(name='div', attrs={'id':'articleMultimedia'}), - dict(name='div', attrs={'id':'articleGalleries'}), - dict(name='div', attrs={'id':'articleAlarm'}), - dict(name='div', attrs={'id':'adholder_srodek1'}), - dict(name='div', attrs={'id':'articleVideo'}), - dict(name='a', attrs={'name':'fb_share'})] - - extra_css = '''h1 { font-size: 1.4em; } - h2 { font-size: 1.0em; }''' + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe index 633b80444a..475a259215 100644 --- a/recipes/gazeta_wyborcza.recipe +++ b/recipes/gazeta_wyborcza.recipe @@ -6,7 +6,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe): title = u'Gazeta.pl' __author__ = 'fenuks, Artur Stachecki' language = 'pl' - description = 'news from gazeta.pl' + description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.' category = 'newspaper' publication_type = 'newspaper' masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg' diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index 3852f65d32..753e4a71d3 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -11,15 +11,14 @@ class Gram_pl(BasicNewsRecipe): max_articles_per_feed = 100 ignore_duplicate_articles = {'title', 'url'} no_stylesheets= True + remove_empty_feeds = True #extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' keep_only_tags= [dict(id='articleModule')] - remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter']})] + remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']})] feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'), - (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'), - (u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'), - #(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss') - ] + (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles') + ] def parse_feeds (self): feeds = BasicNewsRecipe.parse_feeds(self) diff --git a/recipes/gry_online_pl.recipe b/recipes/gry_online_pl.recipe index 4b9282bdd3..2993cb0043 100644 --- a/recipes/gry_online_pl.recipe +++ b/recipes/gry_online_pl.recipe @@ -1,20 +1,23 @@ +import time from calibre.web.feeds.recipes import BasicNewsRecipe class GryOnlinePl(BasicNewsRecipe): title = u'Gry-Online.pl' __author__ = 'fenuks' - description = 'Gry-Online.pl - computer games' + description = u'Wiadomości o grach, recenzje, zapowiedzi. Encyklopedia Gier zawiera opisy gier na PC, konsole Xbox360, PS3 i inne platformy.' category = 'games' language = 'pl' oldest_article = 13 - INDEX= 'http://www.gry-online.pl/' - masthead_url='http://www.gry-online.pl/im/gry-online-logo.png' - cover_url='http://www.gry-online.pl/im/gry-online-logo.png' + INDEX = 'http://www.gry-online.pl/' + masthead_url = 'http://www.gry-online.pl/im/gry-online-logo.png' + cover_url = 'http://www.gry-online.pl/im/gry-online-logo.png' max_articles_per_feed = 100 - no_stylesheets= True - keep_only_tags=[dict(name='div', attrs={'class':['gc660', 'gc660 S013']})] - remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})] - feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] + no_stylesheets = True + keep_only_tags = [dict(name='div', attrs={'class':['gc660', 'gc660 S013', 'news_endpage_tit', 'news_container', 'news']})] + remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})] + feeds = [ + (u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), + ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] def append_page(self, soup, appendtag): @@ -24,7 +27,14 @@ class GryOnlinePl(BasicNewsRecipe): url_part = soup.find('link', attrs={'rel':'canonical'})['href'] url_part = url_part[25:].rpartition('?')[0] for nexturl in nexturls[1:-1]: - soup2 = self.index_to_soup('http://www.gry-online.pl/' + url_part + nexturl['href']) + finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href'] + for i in range(10): + try: + soup2 = self.index_to_soup(finalurl) + break + except: + print 'retrying in 0.5s' + time.sleep(0.5) pagetext = soup2.find(attrs={'class':'gc660'}) for r in pagetext.findAll(name='header'): r.extract() @@ -34,7 +44,42 @@ class GryOnlinePl(BasicNewsRecipe): appendtag.insert(pos, pagetext) for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry']}): r.extract() + else: + tag = appendtag.find('div', attrs={'class':'S018stronyr'}) + if tag: + nexturl = tag.a + url_part = soup.find('link', attrs={'rel':'canonical'})['href'] + url_part = url_part[25:].rpartition('?')[0] + while tag: + end = tag.find(attrs={'class':'right left-dead'}) + if end: + break + else: + nexturl = tag.a + finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href'] + for i in range(10): + try: + soup2 = self.index_to_soup(finalurl) + break + except: + print 'retrying in 0.5s' + time.sleep(0.5) + tag = soup2.find('div', attrs={'class':'S018stronyr'}) + pagetext = soup2.find(attrs={'class':'gc660'}) + for r in pagetext.findAll(name='header'): + r.extract() + for r in pagetext.findAll(attrs={'itemprop':'description'}): + r.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony']}): + r.extract() + def image_url_processor(self, baseurl, url): + if url.startswith('..'): + return url[2:] + else: + return url def preprocess_html(self, soup): self.append_page(soup, soup.body) diff --git a/recipes/icons/emuzica_pl.png b/recipes/icons/emuzica_pl.png new file mode 100644 index 0000000000..f708208bd2 Binary files /dev/null and b/recipes/icons/emuzica_pl.png differ diff --git a/recipes/icons/film_org_pl.png b/recipes/icons/film_org_pl.png new file mode 100644 index 0000000000..c1c26087dc Binary files /dev/null and b/recipes/icons/film_org_pl.png differ diff --git a/recipes/icons/nowa_fantastyka.png b/recipes/icons/nowa_fantastyka.png new file mode 100644 index 0000000000..5c71a5c892 Binary files /dev/null and b/recipes/icons/nowa_fantastyka.png differ diff --git a/recipes/icons/tablety_pl.png b/recipes/icons/tablety_pl.png new file mode 100644 index 0000000000..0c3a25813a Binary files /dev/null and b/recipes/icons/tablety_pl.png differ diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe index 8add89db94..b29e7e243b 100644 --- a/recipes/konflikty_zbrojne.recipe +++ b/recipes/konflikty_zbrojne.recipe @@ -7,7 +7,7 @@ class Konflikty(BasicNewsRecipe): __author__ = 'fenuks' cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg' language = 'pl' - description ='military news' + description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.' category='military, history' oldest_article = 7 max_articles_per_feed = 100 diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe index d1caa85950..c5fba54ab0 100644 --- a/recipes/kosmonauta_pl.recipe +++ b/recipes/kosmonauta_pl.recipe @@ -7,7 +7,7 @@ class Kosmonauta(BasicNewsRecipe): description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.' category = 'astronomy' language = 'pl' - cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' + cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' no_stylesheets = True INDEX = 'http://www.kosmonauta.net' oldest_article = 7 @@ -24,6 +24,5 @@ class Kosmonauta(BasicNewsRecipe): href = a['href'] if not href.startswith('http'): a['href'] = self.INDEX + href - print '%%%%%%%%%%%%%%%%%%%%%%%%%', a['href'] return soup - \ No newline at end of file + diff --git a/recipes/lomza.recipe b/recipes/lomza.recipe index d7e224d13d..2c31271624 100644 --- a/recipes/lomza.recipe +++ b/recipes/lomza.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Lomza(BasicNewsRecipe): title = u'4Lomza' __author__ = 'fenuks' - description = u'4Łomża - regional site' + description = u'Regionalny portal. Najświeższe informacje z regionu, kulturalne, sportowe. Ogłoszenia, baza biznesu, forum.' cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg' language = 'pl' oldest_article = 15 diff --git a/recipes/mlody_technik_pl.recipe b/recipes/mlody_technik_pl.recipe index 1eaa08d23a..4622e73909 100644 --- a/recipes/mlody_technik_pl.recipe +++ b/recipes/mlody_technik_pl.recipe @@ -7,7 +7,7 @@ class Mlody_technik(BasicNewsRecipe): description = u'Młody technik' category = 'science' language = 'pl' - cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg' + #cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg' no_stylesheets = True preprocess_regexps = [(re.compile(r"

Podobne

", re.IGNORECASE), lambda m: '')] oldest_article = 7 @@ -18,10 +18,17 @@ class Mlody_technik(BasicNewsRecipe): remove_tags = [dict(attrs={'class':'st-related-posts'})] remove_tags_after = dict(attrs={'class':'entry-content clearfix'}) feeds = [(u'Wszystko', u'http://www.mt.com.pl/feed'), - (u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'), + #(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'), (u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'), (u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'), (u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'), (u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'), (u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'), (u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')] + + def get_cover_url(self): + soup = self.index_to_soup('http://www.mt.com.pl/') + tag = soup.find(attrs={'class':'xoxo'}) + if tag: + self.cover_url = tag.find('img')['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/niebezpiecznik.recipe b/recipes/niebezpiecznik.recipe index b33a0a3513..a582a85aef 100644 --- a/recipes/niebezpiecznik.recipe +++ b/recipes/niebezpiecznik.recipe @@ -9,8 +9,8 @@ class Niebezpiecznik_pl(BasicNewsRecipe): oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True - cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png' - remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})] - keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})] + cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png' + remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})] + keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})] feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'), ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')] diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe index 7715b9826a..1808d54824 100644 --- a/recipes/nowa_fantastyka.recipe +++ b/recipes/nowa_fantastyka.recipe @@ -9,7 +9,7 @@ class Nowa_Fantastyka(BasicNewsRecipe): __modified_by__ = 'zaslav' language = 'pl' encoding='latin2' - description ='site for fantasy readers' + description = u'Strona dla miłośników fantastyki' category='fantasy' masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg' #extra_css='.tytul {font-size: 20px;}' #not working diff --git a/recipes/pc_foster.recipe b/recipes/pc_foster.recipe index ab8c2b66b1..64bb3d76ee 100644 --- a/recipes/pc_foster.recipe +++ b/recipes/pc_foster.recipe @@ -7,12 +7,12 @@ class PC_Foster(BasicNewsRecipe): description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.' category = 'IT' language = 'pl' - masthead_url='http://pcfoster.pl/public/images/logo.png' - cover_url= 'http://pcfoster.pl/public/images/logo.png' - no_stylesheets= True - remove_empty_feeds= True - keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})] - remove_tags=[dict(name='p', attrs={'class':'right'})] + masthead_url = 'http://pcfoster.pl/public/images/logo.png' + cover_url = 'http://pcfoster.pl/public/images/logo.png' + no_stylesheets = True + remove_empty_feeds = True + keep_only_tags = [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})] + remove_tags = [dict(name='p', attrs={'class':'right'})] feeds = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')] @@ -32,4 +32,4 @@ class PC_Foster(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) - return soup \ No newline at end of file + return soup diff --git a/recipes/polska_times.recipe b/recipes/polska_times.recipe index 21104f1299..6fd6734c8c 100644 --- a/recipes/polska_times.recipe +++ b/recipes/polska_times.recipe @@ -7,9 +7,11 @@ class PolskaTimes(BasicNewsRecipe): language = 'pl' masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17' oldest_article = 7 + encoding = 'iso-8859-2' max_articles_per_feed = 100 - remove_emty_feeds= True + remove_empty_feeds = True no_stylesheets = True + use_embedded_content = False ignore_duplicate_articles = {'title', 'url'} #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) diff --git a/recipes/spiders_web_pl.recipe b/recipes/spiders_web_pl.recipe index 00e3041a5c..e2f9e6834d 100644 --- a/recipes/spiders_web_pl.recipe +++ b/recipes/spiders_web_pl.recipe @@ -4,7 +4,7 @@ class SpidersWeb(BasicNewsRecipe): title = u"Spider's Web" oldest_article = 7 __author__ = 'fenuks' - description = u'Opinie i analizy na temat technologii' + description = u'Autorskie teksty popularnych blogerów, testy sprzętu i aplikacji, oraz wiele więcej.' cover_url = 'http://www.spidersweb.pl/wp-content/themes/new_sw/images/spidersweb.png' category = 'IT, WEB' language = 'pl' diff --git a/recipes/tablety_pl.recipe b/recipes/tablety_pl.recipe index 1c3f46f967..97a44f81c7 100644 --- a/recipes/tablety_pl.recipe +++ b/recipes/tablety_pl.recipe @@ -3,7 +3,7 @@ import re class Tablety_pl(BasicNewsRecipe): title = u'Tablety.pl' __author__ = 'fenuks' - description = u'tablety.pl - latest tablet news' + description = u'Tablety, gry i aplikacje na tablety.' masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png' cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png' category = 'IT' diff --git a/recipes/tanuki.recipe b/recipes/tanuki.recipe index a615763307..6f37c17e7c 100644 --- a/recipes/tanuki.recipe +++ b/recipes/tanuki.recipe @@ -4,6 +4,7 @@ class tanuki(BasicNewsRecipe): title = u'Tanuki' oldest_article = 7 __author__ = 'fenuks' + description = u'Tanuki - portal o anime i mandze.' category = 'anime, manga' language = 'pl' max_articles_per_feed = 100 @@ -42,4 +43,4 @@ class tanuki(BasicNewsRecipe): a['href']='http://manga.tanuki.pl' + a['href'] elif 'tanuki-czytelnia' in soup.title.string.lower(): a['href']='http://czytelnia.tanuki.pl' + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/tvn24.recipe b/recipes/tvn24.recipe index ed0eae574f..22647e9e02 100644 --- a/recipes/tvn24.recipe +++ b/recipes/tvn24.recipe @@ -8,8 +8,8 @@ class tvn24(BasicNewsRecipe): description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata' category = 'news' language = 'pl' - masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' - cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' + #masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' + cover_url= 'http://www.qzdrowiu.pl/Upload/KnowQZdrowiu_PressOffice/TVN24_logo_575702b7-edce-4b6f-a41b-4395f9456f96_ff6d6ccf-528a-4b94-9e61-2fed727aba35.png' extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}' remove_empty_feeds = True remove_javascript = True diff --git a/recipes/ubuntu_pl.recipe b/recipes/ubuntu_pl.recipe index 84912e44fa..4d2340ad84 100644 --- a/recipes/ubuntu_pl.recipe +++ b/recipes/ubuntu_pl.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Ubuntu_pl(BasicNewsRecipe): title = u'UBUNTU.pl' __author__ = 'fenuks' - description = 'UBUNTU.pl - polish ubuntu community site' + description = 'Polskie forum użytkowników Ubuntu Linux. Projekty, porady i dyskusje, gotowe rozwiązania problemów.' masthead_url= 'http://ubuntu.pl/img/logo.jpg' cover_url = 'http://ubuntu.pl/img/logo.jpg' category = 'linux, IT'