From 338ab5d211d09eff95ca35a72d987fcb9be0d483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Wed, 6 Mar 2013 20:28:12 +0100 Subject: [PATCH 1/4] recipes provided by luka6000 --- recipes/biweekly.recipe | 51 ++++++++++++++++++++++++++++++++++ recipes/dwutygodnik.recipe | 51 ++++++++++++++++++++++++++++++++++ recipes/icons/biweekly.png | Bin 0 -> 603 bytes recipes/icons/dwutygodnik.png | Bin 0 -> 603 bytes recipes/jazzpress.recipe | 50 +++++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+) create mode 100644 recipes/biweekly.recipe create mode 100644 recipes/dwutygodnik.recipe create mode 100644 recipes/icons/biweekly.png create mode 100644 recipes/icons/dwutygodnik.png create mode 100644 recipes/jazzpress.recipe diff --git a/recipes/biweekly.recipe b/recipes/biweekly.recipe new file mode 100644 index 0000000000..affe0047d2 --- /dev/null +++ b/recipes/biweekly.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'Łukasz Grąbczewski 2011' +__version__ = '2.0' + +import re, os +from calibre.ptempfile import PersistentTemporaryFile +from calibre.ebooks.conversion.cli import main + +class biweekly(BasicNewsRecipe): + __author__ = u'Łukasz Grąbczewski' + title = 'Biweekly' + language = 'en_EN' + publisher = 'National Audiovisual Institute' + publication_type = 'magazine' + description = u'link with culture [English edition of Polish magazine]: literature, theatre, film, art, music, views, talks' + + conversion_options = { + 'authors' : 'Biweekly.pl' + ,'publisher' : publisher + ,'language' : language + ,'comments' : description + ,'no_default_epub_cover' : True + ,'preserve_cover_aspect_ratio': True + } + + def build_index(self): + browser = self.get_browser() + rc = browser.open('http://www.biweekly.pl/') + + # find the link + epublink = browser.find_link(text_regex=re.compile('ePUB VERSION')) + + # download ebook + self.report_progress(0,_('Downloading ePUB')) + response = browser.follow_link(epublink) + book_file = PersistentTemporaryFile(suffix='.epub') + book_file.write(response.read()) + book_file.close() + + # convert + self.report_progress(0.2,_('Converting to OEB')) + oebdir = self.output_dir + '/INPUT/' + main(['ebook-convert', book_file.name, oebdir]) + + # feed calibre + index = os.path.join(oebdir, 'content.opf') + + return index diff --git a/recipes/dwutygodnik.recipe b/recipes/dwutygodnik.recipe new file mode 100644 index 0000000000..d2e967d0ac --- /dev/null +++ b/recipes/dwutygodnik.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'Łukasz Grąbczewski 2011' +__version__ = '2.0' + +import re, os +from calibre.ptempfile import PersistentTemporaryFile +from calibre.ebooks.conversion.cli import main + +class dwutygodnik(BasicNewsRecipe): + __author__ = u'Łukasz Grąbczewski' + title = 'Dwutygodnik' + language = 'pl_PL' + publisher = 'Narodowy Instytut Audiowizualny' + publication_type = 'magazine' + description = u'Strona Kultury: literatura, teatr, film, sztuka, muzyka, felietony, rozmowy' + + conversion_options = { + 'authors' : 'Dwutygodnik.com' + ,'publisher' : publisher + ,'language' : language + ,'comments' : description + ,'no_default_epub_cover' : True + ,'preserve_cover_aspect_ratio': True + } + + def build_index(self): + browser = self.get_browser() + rc = browser.open('http://www.dwutygodnik.com/') + + # find the link + epublink = browser.find_link(text_regex=re.compile('Wersja ePub')) + + # download ebook + self.report_progress(0,_('Downloading ePUB')) + response = browser.follow_link(epublink) + book_file = PersistentTemporaryFile(suffix='.epub') + book_file.write(response.read()) + book_file.close() + + # convert + self.report_progress(0.2,_('Converting to OEB')) + oebdir = self.output_dir + '/INPUT/' + main(['ebook-convert', book_file.name, oebdir]) + + # feed calibre + index = os.path.join(oebdir, 'content.opf') + + return index diff --git a/recipes/icons/biweekly.png b/recipes/icons/biweekly.png new file mode 100644 index 0000000000000000000000000000000000000000..00356e091d5d65a4738ce58b63e885d02c34ec8f GIT binary patch literal 603 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5lsFLJ z6XN>+|NoC4KCD}_X3>HL5ANUp`}gmb&6`)PT)Ae|>Mx%^pFMNN%-BR(UO`<&b=J(8 zCWc1GjvoE>>zA#im8F@vhpVfqqEb^sqq&KxvA%(?m$#p{&#hax&YwG{qoMic&6}w3 zh~Dn*j<&Y4;*wi8Z@M};C&efH`}Z#*G_1X~b<%{1Dhi4hFI?>F>HYKPPjy9Qduv-& zdBwD;Q}5ortEZ(M9ugW68d{WJaPh(g69dDY+jq>HGk4zHx%21F+p%r?)vH%$Oq(wD z(4YtC7se!ScNc~ZR#^`qhqJ&VvKUB%*d7dSC$sHa&K%Ck;oRfUv;S zfZ*pB8@L-9)cU8HO=_6M#^2A!?Bo#M-|imI>v^ET{j`9`LDrrv6+ON_zGojWJriJ$ zRM-*`6?yx{jT^Uao)&oQuK;wC8Lw+i(2;dON2!*$MwFx^mZVxG7o`Fz1|tJQV_gFv zG7T{@ure~UGP2M%Ftjo-c%IUgf}$ZeKP5A*5~RTZs9M*^AjH7f%D~LZ&_LV32&lpQ Ssoo@@1_n=8KbLh*2~7YWao?8! literal 0 HcmV?d00001 diff --git a/recipes/icons/dwutygodnik.png b/recipes/icons/dwutygodnik.png new file mode 100644 index 0000000000000000000000000000000000000000..00356e091d5d65a4738ce58b63e885d02c34ec8f GIT binary patch literal 603 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5lsFLJ z6XN>+|NoC4KCD}_X3>HL5ANUp`}gmb&6`)PT)Ae|>Mx%^pFMNN%-BR(UO`<&b=J(8 zCWc1GjvoE>>zA#im8F@vhpVfqqEb^sqq&KxvA%(?m$#p{&#hax&YwG{qoMic&6}w3 zh~Dn*j<&Y4;*wi8Z@M};C&efH`}Z#*G_1X~b<%{1Dhi4hFI?>F>HYKPPjy9Qduv-& zdBwD;Q}5ortEZ(M9ugW68d{WJaPh(g69dDY+jq>HGk4zHx%21F+p%r?)vH%$Oq(wD z(4YtC7se!ScNc~ZR#^`qhqJ&VvKUB%*d7dSC$sHa&K%Ck;oRfUv;S zfZ*pB8@L-9)cU8HO=_6M#^2A!?Bo#M-|imI>v^ET{j`9`LDrrv6+ON_zGojWJriJ$ zRM-*`6?yx{jT^Uao)&oQuK;wC8Lw+i(2;dON2!*$MwFx^mZVxG7o`Fz1|tJQV_gFv zG7T{@ure~UGP2M%Ftjo-c%IUgf}$ZeKP5A*5~RTZs9M*^AjH7f%D~LZ&_LV32&lpQ Ssoo@@1_n=8KbLh*2~7YWao?8! literal 0 HcmV?d00001 diff --git a/recipes/jazzpress.recipe b/recipes/jazzpress.recipe new file mode 100644 index 0000000000..5370a4732d --- /dev/null +++ b/recipes/jazzpress.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = u'Łukasz Grąbczewski 2011-2013' +__version__ = '2.0' + +import re, zipfile, os +from calibre.ptempfile import PersistentTemporaryFile +from calibre.ebooks.conversion.cli import main + +class jazzpress(BasicNewsRecipe): + __author__ = u'Łukasz Grąbczewski' + title = 'JazzPRESS' + language = 'pl' + publisher = 'Fundacja Popularyzacji Muzyki Jazzowej EuroJAZZ' + publication_type = 'magazine' + description = u'Internetowa gazeta poświęcona muzyce improwizowanej' + + conversion_options = { + 'authors' : 'Fundacja Popularyzacji Muzyki Jazzowej EuroJAZZ' + ,'publisher' : publisher + ,'language' : language + ,'preserve_cover_aspect_ratio': True + ,'remove_first_image': True + } + + def build_index(self): + browser = self.get_browser() + rc = browser.open('http://radiojazz.fm/') + + # find the link + epublink = browser.find_link(url_regex=re.compile('e_jazzpress\d\d\d\d\_epub')) + + # download ebook + self.report_progress(0,_('Downloading ePUB')) + response = browser.follow_link(epublink) + book_file = PersistentTemporaryFile(suffix='.epub') + book_file.write(response.read()) + book_file.close() + + # convert + self.report_progress(0.2,_('Converting to OEB')) + oebdir = self.output_dir + '/INPUT/' + main(['ebook-convert', book_file.name, oebdir]) + + # feed calibre + index = os.path.join(oebdir, 'content.opf') + + return index From 72b925e22ad73ef61ea7673a41be3e184b08618e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Wed, 6 Mar 2013 20:30:03 +0100 Subject: [PATCH 2/4] recipes provided by luka6000 --- recipes/icons/zycie_warszawy.png | Bin 0 -> 856 bytes recipes/zycie_warszawy.recipe | 46 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 recipes/icons/zycie_warszawy.png create mode 100644 recipes/zycie_warszawy.recipe diff --git a/recipes/icons/zycie_warszawy.png b/recipes/icons/zycie_warszawy.png new file mode 100644 index 0000000000000000000000000000000000000000..a13d1a018cec7f601552b2fca8a52e679e62632c GIT binary patch literal 856 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)R!9I z6XN>+|Np;#|FyNYPU!1<_wL>HZQFM4*tvAclGUqL?ccZW!nyOme*Q{LN?NsI#nvrb z)~{XbU}Kk)m3`yJjjvz7e*F0H%a<>oKY#x5e?OKwojch)ydAjrK$Pv-@m=xJ=3R7b#Zj^_VAoLd-m>~JOBOt z+uYdX?e6j6{f7(Z&mRYZW5;W%s+TQZ($d(J5EtLw*)?zOoU>=o+FDv2*uVeJpFgu_ z%~~{nL4J0wi-XhaSFh*IohKglw^K_Q{cP-c#YE z5>oY^@hFtD)DGgGoz%J3cwBZFV8JbVBA+{-|J zsFt`!l%yn~I83o$UTGB&X?wa_*&v@$SwQ1j Date: Wed, 6 Mar 2013 20:34:04 +0100 Subject: [PATCH 3/4] fixes for old recipes made by fenuks --- recipes/adventure_zone_pl.recipe | 4 +- recipes/archeowiesci.recipe | 3 +- recipes/astro_news_pl.recipe | 2 +- recipes/astroflesz.recipe | 1 + recipes/astronomia_pl.recipe | 2 +- recipes/bash_org_pl.recipe | 4 +- recipes/benchmark_pl.recipe | 20 ++-- recipes/cd_action.recipe | 4 +- recipes/computerworld_pl.recipe | 16 ++- recipes/conowego_pl.recipe | 10 +- recipes/czas_gentlemanow.recipe | 6 +- recipes/dobreprogamy.recipe | 2 +- recipes/dzieje_pl.recipe | 4 +- recipes/eioba.recipe | 1 + recipes/elektroda_pl.recipe | 2 +- recipes/emuzica_pl.recipe | 3 +- recipes/film_web.recipe | 14 +-- recipes/focus_pl.recipe | 2 +- recipes/fotoblogia_pl.recipe | 3 +- recipes/gazeta_pomorska.recipe | 165 ++++++++++++++---------------- recipes/gazeta_wyborcza.recipe | 2 +- recipes/gram_pl.recipe | 9 +- recipes/gry_online_pl.recipe | 63 ++++++++++-- recipes/icons/emuzica_pl.png | Bin 0 -> 760 bytes recipes/icons/film_org_pl.png | Bin 0 -> 762 bytes recipes/icons/nowa_fantastyka.png | Bin 0 -> 1747 bytes recipes/icons/tablety_pl.png | Bin 0 -> 834 bytes recipes/konflikty_zbrojne.recipe | 2 +- recipes/kosmonauta_pl.recipe | 5 +- recipes/lomza.recipe | 2 +- recipes/mlody_technik_pl.recipe | 11 +- recipes/niebezpiecznik.recipe | 6 +- recipes/nowa_fantastyka.recipe | 2 +- recipes/pc_foster.recipe | 14 +-- recipes/polska_times.recipe | 4 +- recipes/spiders_web_pl.recipe | 2 +- recipes/tablety_pl.recipe | 2 +- recipes/tanuki.recipe | 3 +- recipes/tvn24.recipe | 4 +- recipes/ubuntu_pl.recipe | 2 +- 40 files changed, 228 insertions(+), 173 deletions(-) create mode 100644 recipes/icons/emuzica_pl.png create mode 100644 recipes/icons/film_org_pl.png create mode 100644 recipes/icons/nowa_fantastyka.png create mode 100644 recipes/icons/tablety_pl.png diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index b02460695e..2a6cf9957d 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -3,7 +3,7 @@ import re class Adventure_zone(BasicNewsRecipe): title = u'Adventure Zone' __author__ = 'fenuks' - description = u'Adventure zone - adventure games from A to Z' + description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' category = 'games' language = 'pl' no_stylesheets = True @@ -78,4 +78,4 @@ class Adventure_zone(BasicNewsRecipe): a['href']=self.index + a['href'] return soup - \ No newline at end of file + diff --git a/recipes/archeowiesci.recipe b/recipes/archeowiesci.recipe index 6bcc9bef6c..57647d7469 100644 --- a/recipes/archeowiesci.recipe +++ b/recipes/archeowiesci.recipe @@ -5,6 +5,7 @@ class Archeowiesci(BasicNewsRecipe): __author__ = 'fenuks' category = 'archeology' language = 'pl' + description = u'Z pasją o przeszłości' cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg' oldest_article = 7 needs_subscription='optional' @@ -29,4 +30,4 @@ class Archeowiesci(BasicNewsRecipe): br['log'] = self.username br['pwd'] = self.password br.submit() - return br \ No newline at end of file + return br diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe index 2808fed6e1..b7a15a9809 100644 --- a/recipes/astro_news_pl.recipe +++ b/recipes/astro_news_pl.recipe @@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class AstroNEWS(BasicNewsRecipe): title = u'AstroNEWS' __author__ = 'fenuks' - description = 'AstroNEWS- astronomy every day' + description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.' category = 'astronomy, science' language = 'pl' oldest_article = 8 diff --git a/recipes/astroflesz.recipe b/recipes/astroflesz.recipe index 0b92fdfa29..745ade420c 100644 --- a/recipes/astroflesz.recipe +++ b/recipes/astroflesz.recipe @@ -13,6 +13,7 @@ class Astroflesz(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + remove_attributes = ['style'] keep_only_tags = [dict(id="k2Container")] remove_tags_after = dict(name='div', attrs={'class':'itemLinks'}) remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})] diff --git a/recipes/astronomia_pl.recipe b/recipes/astronomia_pl.recipe index 89a0e4c889..aa84860976 100644 --- a/recipes/astronomia_pl.recipe +++ b/recipes/astronomia_pl.recipe @@ -3,7 +3,7 @@ import re class Astronomia_pl(BasicNewsRecipe): title = u'Astronomia.pl' __author__ = 'fenuks' - description = 'Astronomia - polish astronomy site' + description = u'Astronomia.pl jest edukacyjnym portalem skierowanym do uczniów, studentów i miłośników astronomii. Przedstawiamy gwiazdy, planety, galaktyki, czarne dziury i wiele innych tajemnic Wszechświata.' masthead_url = 'http://www.astronomia.pl/grafika/logo.gif' cover_url = 'http://www.astronomia.pl/grafika/logo.gif' category = 'astronomy, science' diff --git a/recipes/bash_org_pl.recipe b/recipes/bash_org_pl.recipe index a04f267ca3..b772b7c3b4 100644 --- a/recipes/bash_org_pl.recipe +++ b/recipes/bash_org_pl.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Bash_org_pl(BasicNewsRecipe): title = u'Bash.org.pl' __author__ = 'fenuks' - description = 'Bash.org.pl - funny quotations from IRC discussions' + description = 'Bash.org.pl - zabawne cytaty z IRC' category = 'funny quotations, humour' language = 'pl' cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png' @@ -51,4 +51,4 @@ class Bash_org_pl(BasicNewsRecipe): feeds = [] feeds.append((u"Najnowsze", self.latest_articles())) feeds.append((u"Losowe", self.random_articles())) - return feeds \ No newline at end of file + return feeds diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe index 66d4f35e73..95c5488a24 100644 --- a/recipes/benchmark_pl.recipe +++ b/recipes/benchmark_pl.recipe @@ -3,14 +3,15 @@ import re class BenchmarkPl(BasicNewsRecipe): title = u'Benchmark.pl' __author__ = 'fenuks' - description = u'benchmark.pl -IT site' + description = u'benchmark.pl, recenzje i testy sprzętu, aktualności, rankingi, sterowniki, porady, opinie' masthead_url = 'http://www.benchmark.pl/i/logo-footer.png' - cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif' + cover_url = 'http://www.benchmark.pl/i/logo-dark.png' category = 'IT' language = 'pl' oldest_article = 8 max_articles_per_feed = 100 - no_stylesheets=True + no_stylesheets = True + remove_attributes = ['style'] preprocess_regexps = [(re.compile(ur'

 Zobacz poprzednie Opinie dnia:.*', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Więcej o .*?', re.DOTALL|re.IGNORECASE), lambda match: '')] keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')] remove_tags_after=dict(name='div', attrs={'class':'body'}) @@ -21,17 +22,18 @@ class BenchmarkPl(BasicNewsRecipe): def append_page(self, soup, appendtag): - nexturl = soup.find('span', attrs={'class':'next'}) - while nexturl is not None: - nexturl= self.INDEX + nexturl.parent['href'] - soup2 = self.index_to_soup(nexturl) - nexturl=soup2.find('span', attrs={'class':'next'}) + nexturl = soup.find(attrs={'class':'next'}) + while nexturl: + soup2 = self.index_to_soup(nexturl['href']) + nexturl = soup2.find(attrs={'class':'next'}) pagetext = soup2.find(name='div', attrs={'class':'body'}) appendtag.find('div', attrs={'class':'k_ster'}).extract() pos = len(appendtag.contents) appendtag.insert(pos, pagetext) - if appendtag.find('div', attrs={'class':'k_ster'}) is not None: + if appendtag.find('div', attrs={'class':'k_ster'}): appendtag.find('div', attrs={'class':'k_ster'}).extract() + for r in appendtag.findAll(attrs={'class':'changePage'}): + r.extract() def image_article(self, soup, appendtag): diff --git a/recipes/cd_action.recipe b/recipes/cd_action.recipe index 4e19fbc6c1..6be7a2ae12 100644 --- a/recipes/cd_action.recipe +++ b/recipes/cd_action.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class CD_Action(BasicNewsRecipe): title = u'CD-Action' __author__ = 'fenuks' - description = 'cdaction.pl - polish games magazine site' + description = 'Strona CD-Action (CDA), największego w Polsce pisma dla graczy.Pełne wersje gier, newsy, recenzje, zapowiedzi, konkursy, forum, opinie, galerie screenów,trailery, filmiki, patche, teksty. Gry komputerowe (PC) oraz na konsole (PS3, XBOX 360).' category = 'games' language = 'pl' index='http://www.cdaction.pl' @@ -24,4 +24,4 @@ class CD_Action(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/computerworld_pl.recipe b/recipes/computerworld_pl.recipe index 2ec457e4de..e9aab68226 100644 --- a/recipes/computerworld_pl.recipe +++ b/recipes/computerworld_pl.recipe @@ -7,17 +7,13 @@ class Computerworld_pl(BasicNewsRecipe): description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne' category = 'IT' language = 'pl' - masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' - no_stylesheets=True + masthead_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' + cover_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif' + no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100 - keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})] - remove_tags_after=dict(name='div', attrs={'class':'rMobi'}) - remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})] + keep_only_tags = [dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})] + remove_tags_after = dict(name='div', attrs={'class':'rMobi'}) + remove_tags = [dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})] feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')] - def get_cover_url(self): - soup = self.index_to_soup('http://www.computerworld.pl/') - cover=soup.find(name='img', attrs={'class':'prawo'}) - self.cover_url=cover['src'] - return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/conowego_pl.recipe b/recipes/conowego_pl.recipe index 8b4288ddcd..f180a756b2 100644 --- a/recipes/conowego_pl.recipe +++ b/recipes/conowego_pl.recipe @@ -4,11 +4,12 @@ class CoNowegoPl(BasicNewsRecipe): title = u'conowego.pl' __author__ = 'fenuks' description = u'Nowy wortal technologiczny oraz gazeta internetowa. Testy najnowszych produktów, fachowe porady i recenzje. U nas znajdziesz wszystko o elektronice użytkowej !' - cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png' + #cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png' category = 'IT, news' language = 'pl' oldest_article = 7 max_articles_per_feed = 100 + INDEX = 'http://www.conowego.pl/' no_stylesheets = True remove_empty_feeds = True use_embedded_content = False @@ -36,3 +37,10 @@ class CoNowegoPl(BasicNewsRecipe): for r in appendtag.findAll(attrs={'class':['pages', 'paginationWrap']}): r.extract() + + def get_cover_url(self): + soup = self.index_to_soup('http://www.conowego.pl/magazyn/') + tag = soup.find(attrs={'class':'ms_left'}) + if tag: + self.cover_url = self.INDEX + tag.find('img')['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/czas_gentlemanow.recipe b/recipes/czas_gentlemanow.recipe index 6df677f25f..009cc7e9dd 100644 --- a/recipes/czas_gentlemanow.recipe +++ b/recipes/czas_gentlemanow.recipe @@ -1,4 +1,5 @@ # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai +import re from calibre.web.feeds.news import BasicNewsRecipe class CzasGentlemanow(BasicNewsRecipe): @@ -13,8 +14,9 @@ class CzasGentlemanow(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True + preprocess_regexps = [(re.compile(u'

Może Cię też zainteresować:

'), lambda m: '')] use_embedded_content = False keep_only_tags = [dict(name='div', attrs={'class':'content'})] - remove_tags = [dict(attrs={'class':'meta_comments'})] - remove_tags_after = dict(name='div', attrs={'class':'fblikebutton_button'}) + remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])] + remove_tags_after = dict(id='comments') feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')] diff --git a/recipes/dobreprogamy.recipe b/recipes/dobreprogamy.recipe index a4e24ac61b..708bdbb017 100644 --- a/recipes/dobreprogamy.recipe +++ b/recipes/dobreprogamy.recipe @@ -18,7 +18,7 @@ class Dobreprogramy_pl(BasicNewsRecipe): max_articles_per_feed = 100 preprocess_regexps = [(re.compile(ur'
Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...
'), lambda match: '') ] keep_only_tags=[dict(attrs={'class':['news', 'entry single']})] - remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze')] + remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')] #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})] feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'), ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')] diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index 603591e9f0..b74f18c006 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Dzieje(BasicNewsRecipe): title = u'dzieje.pl' __author__ = 'fenuks' - description = 'Dzieje - history of Poland' + description = 'Dzieje.pl - najlepszy portal informacyjno-edukacyjny dotyczący historii Polski XX wieku. Archiwalne fotografie, filmy, katalog postaci, quizy i konkursy.' cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png' category = 'history' language = 'pl' @@ -67,4 +67,4 @@ class Dzieje(BasicNewsRecipe): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] self.append_page(soup, soup.body) - return soup \ No newline at end of file + return soup diff --git a/recipes/eioba.recipe b/recipes/eioba.recipe index 1df79d64bd..f55138931c 100644 --- a/recipes/eioba.recipe +++ b/recipes/eioba.recipe @@ -4,6 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class eioba(BasicNewsRecipe): title = u'eioba' __author__ = 'fenuks' + description = u'eioba.pl - daj się przeczytać!' cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png' language = 'pl' oldest_article = 7 diff --git a/recipes/elektroda_pl.recipe b/recipes/elektroda_pl.recipe index 34871ea04a..1886c2aecd 100644 --- a/recipes/elektroda_pl.recipe +++ b/recipes/elektroda_pl.recipe @@ -5,7 +5,7 @@ class Elektroda(BasicNewsRecipe): title = u'Elektroda' oldest_article = 8 __author__ = 'fenuks' - description = 'Elektroda.pl' + description = 'Międzynarodowy portal elektroniczny udostępniający bogate zasoby z dziedziny elektroniki oraz forum dyskusyjne.' cover_url = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif' category = 'electronics' language = 'pl' diff --git a/recipes/emuzica_pl.recipe b/recipes/emuzica_pl.recipe index 2fbf9ff514..0b3b207c5e 100644 --- a/recipes/emuzica_pl.recipe +++ b/recipes/emuzica_pl.recipe @@ -12,6 +12,7 @@ class eMuzyka(BasicNewsRecipe): no_stylesheets = True oldest_article = 7 max_articles_per_feed = 100 + remove_attributes = ['style'] keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})] remove_tags=[dict(name='span', attrs={'id':'date'})] feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')] @@ -20,4 +21,4 @@ class eMuzyka(BasicNewsRecipe): for a in soup('a'): if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']: a['href']=self.index + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/film_web.recipe b/recipes/film_web.recipe index 6b014e8f93..b1d7f5c578 100644 --- a/recipes/film_web.recipe +++ b/recipes/film_web.recipe @@ -4,21 +4,21 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup class FilmWebPl(BasicNewsRecipe): title = u'FilmWeb' __author__ = 'fenuks' - description = 'FilmWeb - biggest polish movie site' - cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png' + description = 'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy. Największa baza filmów, seriali i aktorów, repertuar kin i tv, ...' + cover_url = 'http://gfx.filmweb.pl/n/logo-filmweb-bevel.jpg' category = 'movies' language = 'pl' - index='http://www.filmweb.pl' + index = 'http://www.filmweb.pl' oldest_article = 8 max_articles_per_feed = 100 - no_stylesheets= True - remove_empty_feeds=True + no_stylesheets = True + remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')] extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}' - remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})] + remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})] remove_attributes = ['style',] - keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})] + keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})] feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'), (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'), (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'), diff --git a/recipes/focus_pl.recipe b/recipes/focus_pl.recipe index 1954fd7803..66864b8561 100644 --- a/recipes/focus_pl.recipe +++ b/recipes/focus_pl.recipe @@ -13,7 +13,7 @@ class FocusRecipe(BasicNewsRecipe): title = u'Focus' publisher = u'Gruner + Jahr Polska' category = u'News' - description = u'Newspaper' + description = u'Focus.pl - pierwszy w Polsce portal społecznościowy dla miłośników nauki. Tematyka: nauka, historia, cywilizacja, technika, przyroda, sport, gadżety' category = 'magazine' cover_url = '' remove_empty_feeds = True diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe index 99df46419a..a482390e0c 100644 --- a/recipes/fotoblogia_pl.recipe +++ b/recipes/fotoblogia_pl.recipe @@ -3,6 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Fotoblogia_pl(BasicNewsRecipe): title = u'Fotoblogia.pl' __author__ = 'fenuks' + description = u'Jeden z największych polskich blogów o fotografii.' category = 'photography' language = 'pl' masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg' @@ -11,6 +12,6 @@ class Fotoblogia_pl(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})] + keep_only_tags=[dict(name='div', attrs={'class':['post-view post-standard', 'photo-container']})] remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})] feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')] diff --git a/recipes/gazeta_pomorska.recipe b/recipes/gazeta_pomorska.recipe index c611a513c8..557fcb726c 100644 --- a/recipes/gazeta_pomorska.recipe +++ b/recipes/gazeta_pomorska.recipe @@ -1,102 +1,91 @@ -#!/usr/bin/env python - -# # Przed uzyciem przeczytaj komentarz w sekcji "feeds" - -__license__ = 'GPL v3' -__copyright__ = u'2010, Richard z forum.eksiazki.org' -'''pomorska.pl''' - import re from calibre.web.feeds.news import BasicNewsRecipe class GazetaPomorska(BasicNewsRecipe): title = u'Gazeta Pomorska' - publisher = u'Gazeta Pomorska' - description = u'Kujawy i Pomorze - wiadomo\u015bci' + __author__ = 'Richard z forum.eksiazki.org, fenuks' + description = u'Gazeta Pomorska - portal regionalny' + category = 'newspaper' language = 'pl' - __author__ = u'Richard z forum.eksiazki.org' - # # (dziekuje t3d z forum.eksiazki.org za testy) - oldest_article = 2 - max_articles_per_feed = 20 + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.pomorska.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True no_stylesheets = True - remove_javascript = True - preprocess_regexps = [ - (re.compile(r'', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - # # PomorskaRSS - wiadomosci kazdego typu, zakomentuj znakiem "#" - # # przed odkomentowaniem wiadomosci wybranego typu: - (u'PomorskaRSS', u'http://www.pomorska.pl/rss.xml') + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] - # # wiadomosci z regionu nie przypisane do okreslonego miasta: - # (u'Region', u'http://www.pomorska.pl/region.xml'), + feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'), + (u'Region', u'http://www.pomorska.pl/region.xml'), + (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), + (u'Nakło', u'http://www.pomorska.pl/naklo.xml'), + (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'), + (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'), + (u'Grudziądz', u'http://www.pomorska.pl/grudziadz.xml'), + (u'Inowrocław', u'http://www.pomorska.pl/inowroclaw.xml'), + (u'Toruń', u'http://www.pomorska.pl/torun.xml'), + (u'Włocławek', u'http://www.pomorska.pl/wloclawek.xml'), + (u'Aleksandrów Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'), + (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'), + (u'Chełmno', u'http://www.pomorska.pl/chelmno.xml'), + (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'), + (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'), + (u'Golub-Dobrzyń', u'http://www.pomorska.pl/golubdobrzyn.xml'), + (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'), + (u'Radziejów', u'http://www.pomorska.pl/radziejow.xml'), + (u'Rypin', u'http://www.pomorska.pl/rypin.xml'), + (u'Sępólno', u'http://www.pomorska.pl/sepolno.xml'), + (u'Świecie', u'http://www.pomorska.pl/swiecie.xml'), + (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'), + (u'Żnin', u'http://www.pomorska.pl/znin.xml'), + (u'Sport', u'http://www.pomorska.pl/sport.xml'), + (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'), + (u'Auto', u'http://www.pomorska.pl/moto.xml'), + (u'Dom', u'http://www.pomorska.pl/dom.xml'), + #(u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'), + (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')] - # # wiadomosci przypisane do miast: - # (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), - # (u'Nak\u0142o', u'http://www.pomorska.pl/naklo.xml'), - # (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'), - # (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'), - # (u'Grudzi\u0105dz', u'http://www.pomorska.pl/grudziadz.xml'), - # (u'Inowroc\u0142aw', u'http://www.pomorska.pl/inowroclaw.xml'), - # (u'Toru\u0144', u'http://www.pomorska.pl/torun.xml'), - # (u'W\u0142oc\u0142awek', u'http://www.pomorska.pl/wloclawek.xml'), - # (u'Aleksandr\u00f3w Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'), - # (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'), - # (u'Che\u0142mno', u'http://www.pomorska.pl/chelmno.xml'), - # (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'), - # (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'), - # (u'Golub Dobrzy\u0144', u'http://www.pomorska.pl/golubdobrzyn.xml'), - # (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'), - # (u'Radziej\u00f3w', u'http://www.pomorska.pl/radziejow.xml'), - # (u'Rypin', u'http://www.pomorska.pl/rypin.xml'), - # (u'S\u0119p\u00f3lno', u'http://www.pomorska.pl/sepolno.xml'), - # (u'\u015awiecie', u'http://www.pomorska.pl/swiecie.xml'), - # (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'), - # (u'\u017bnin', u'http://www.pomorska.pl/znin.xml') + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) - # # wiadomosci tematyczne (redundancja z region/miasta): - # (u'Sport', u'http://www.pomorska.pl/sport.xml'), - # (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'), - # (u'Auto', u'http://www.pomorska.pl/moto.xml'), - # (u'Dom', u'http://www.pomorska.pl/dom.xml'), - # (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'), - # (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml') - ] + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] - keep_only_tags = [dict(name='div', attrs={'id':'article'})] + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) - remove_tags = [ - dict(name='p', attrs={'id':'articleTags'}), - dict(name='div', attrs={'id':'articleEpaper'}), - dict(name='div', attrs={'id':'articleConnections'}), - dict(name='div', attrs={'class':'articleFacts'}), - dict(name='div', attrs={'id':'articleExternalLink'}), - dict(name='div', attrs={'id':'articleMultimedia'}), - dict(name='div', attrs={'id':'articleGalleries'}), - dict(name='div', attrs={'id':'articleAlarm'}), - dict(name='div', attrs={'id':'adholder_srodek1'}), - dict(name='div', attrs={'id':'articleVideo'}), - dict(name='a', attrs={'name':'fb_share'})] - - extra_css = '''h1 { font-size: 1.4em; } - h2 { font-size: 1.0em; }''' + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe index 633b80444a..475a259215 100644 --- a/recipes/gazeta_wyborcza.recipe +++ b/recipes/gazeta_wyborcza.recipe @@ -6,7 +6,7 @@ class Gazeta_Wyborcza(BasicNewsRecipe): title = u'Gazeta.pl' __author__ = 'fenuks, Artur Stachecki' language = 'pl' - description = 'news from gazeta.pl' + description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.' category = 'newspaper' publication_type = 'newspaper' masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg' diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index 3852f65d32..753e4a71d3 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -11,15 +11,14 @@ class Gram_pl(BasicNewsRecipe): max_articles_per_feed = 100 ignore_duplicate_articles = {'title', 'url'} no_stylesheets= True + remove_empty_feeds = True #extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' keep_only_tags= [dict(id='articleModule')] - remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter']})] + remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']})] feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'), - (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'), - (u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'), - #(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss') - ] + (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles') + ] def parse_feeds (self): feeds = BasicNewsRecipe.parse_feeds(self) diff --git a/recipes/gry_online_pl.recipe b/recipes/gry_online_pl.recipe index 4b9282bdd3..2993cb0043 100644 --- a/recipes/gry_online_pl.recipe +++ b/recipes/gry_online_pl.recipe @@ -1,20 +1,23 @@ +import time from calibre.web.feeds.recipes import BasicNewsRecipe class GryOnlinePl(BasicNewsRecipe): title = u'Gry-Online.pl' __author__ = 'fenuks' - description = 'Gry-Online.pl - computer games' + description = u'Wiadomości o grach, recenzje, zapowiedzi. Encyklopedia Gier zawiera opisy gier na PC, konsole Xbox360, PS3 i inne platformy.' category = 'games' language = 'pl' oldest_article = 13 - INDEX= 'http://www.gry-online.pl/' - masthead_url='http://www.gry-online.pl/im/gry-online-logo.png' - cover_url='http://www.gry-online.pl/im/gry-online-logo.png' + INDEX = 'http://www.gry-online.pl/' + masthead_url = 'http://www.gry-online.pl/im/gry-online-logo.png' + cover_url = 'http://www.gry-online.pl/im/gry-online-logo.png' max_articles_per_feed = 100 - no_stylesheets= True - keep_only_tags=[dict(name='div', attrs={'class':['gc660', 'gc660 S013']})] - remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})] - feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] + no_stylesheets = True + keep_only_tags = [dict(name='div', attrs={'class':['gc660', 'gc660 S013', 'news_endpage_tit', 'news_container', 'news']})] + remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})] + feeds = [ + (u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), + ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] def append_page(self, soup, appendtag): @@ -24,7 +27,14 @@ class GryOnlinePl(BasicNewsRecipe): url_part = soup.find('link', attrs={'rel':'canonical'})['href'] url_part = url_part[25:].rpartition('?')[0] for nexturl in nexturls[1:-1]: - soup2 = self.index_to_soup('http://www.gry-online.pl/' + url_part + nexturl['href']) + finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href'] + for i in range(10): + try: + soup2 = self.index_to_soup(finalurl) + break + except: + print 'retrying in 0.5s' + time.sleep(0.5) pagetext = soup2.find(attrs={'class':'gc660'}) for r in pagetext.findAll(name='header'): r.extract() @@ -34,7 +44,42 @@ class GryOnlinePl(BasicNewsRecipe): appendtag.insert(pos, pagetext) for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry']}): r.extract() + else: + tag = appendtag.find('div', attrs={'class':'S018stronyr'}) + if tag: + nexturl = tag.a + url_part = soup.find('link', attrs={'rel':'canonical'})['href'] + url_part = url_part[25:].rpartition('?')[0] + while tag: + end = tag.find(attrs={'class':'right left-dead'}) + if end: + break + else: + nexturl = tag.a + finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href'] + for i in range(10): + try: + soup2 = self.index_to_soup(finalurl) + break + except: + print 'retrying in 0.5s' + time.sleep(0.5) + tag = soup2.find('div', attrs={'class':'S018stronyr'}) + pagetext = soup2.find(attrs={'class':'gc660'}) + for r in pagetext.findAll(name='header'): + r.extract() + for r in pagetext.findAll(attrs={'itemprop':'description'}): + r.extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony']}): + r.extract() + def image_url_processor(self, baseurl, url): + if url.startswith('..'): + return url[2:] + else: + return url def preprocess_html(self, soup): self.append_page(soup, soup.body) diff --git a/recipes/icons/emuzica_pl.png b/recipes/icons/emuzica_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..f708208bd2e0d64a96d69b22a66f5984d2bcd7a7 GIT binary patch literal 760 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)Mp*w z6XN>!Vc_rkzMpUU{JP_n9BX`PtILmDZu2JCgasL_o@G~&XQr>vu%V?Yg6rqmGfW zmbZug=PQm|7h20JDJdu^IXP(G+;9KoitWa^mdUY3)7#BA&9(e+(N;}ERb5k6QCZp2 zLgVc@n@<-lg8U6$p0wCH-^|%TJJ?@u(L_@fHPwlY#vjfZ7iJimn(Hl|VDkK!aeBO- zm%C13x_(-m-n-L!yB6uIYiVie=~d(!HkRl)*lBH>ub-Eq`}(-X!vh+DKDq%u`X|?D zU*E1dqfOh=QtQMT&B={AFOMi6U9RfxqM@v=uB)eIV5Gfgf%^Tu%FhqVSy`$nsj8}K zXsBsvY3gX-+okYypX9xrQp##-YMPqr+S>PbNgS=VqOPu?t(_3A@oYXmo1bUq`CRF?C+W&#d1;ZB2_HK> zotO@4E>c|LbNL8W`EgNU-$pa!+H;SSp|bHm46oZ0sW#{;u=wsl30>z zm0Xkxq!^4042^XSfXFPwz`)Ab!phiK+rZGuz`#z!Q3pjsZhlH;S|z#$b1Op=hz83) Sv)=(VFnGH9xvX7C_B)&eQY`6?zK#qG8~eHcB(ehejKx9j zP7LeL$-D$|6p}rHd>I(3)EF2VS{N990fib~Fff!FFfhDIU|_JC!N4G1FlSew4N!tD z$=luKKNxUEe80oMz{KL|;uvCa`s`ItkI+Dw;~&r0dS$ioPPy1HNzl`Yd5T+Zml?10 zV%LjXJY(2fU71C7o1W!pi@Kz)7F&2lElcs@B9%|Bf!$pv)J#~6eeUHJuJdX=Uy=7K zci;5G@8|u0|L45zbGNf+k9@o(VR2b(N5WFWm%*tI_APF_zKxYHWm}G7SKHi`4VQx_ z9b!KoRuOPHp2bXue~pE~BxCyptFkke90=)nw|vzBXH|jaM%xdrI}$$0;G9^>z19Ps zw#t@4&i?kyi|z*MPBB|w#VHzEIyFFI`ZC2h73N7RpQdt8@QRWb?UhrA^t>Z<{lJgT z)T0|xKJ8o^ARDjMF!SQC=n4O`4smr%SAF0Ad->jzLJ0wX_cfmTA@{^qdk&kiT<#3+ ztDE)mn_7?iFIo8H@1LX%&%gUUlS>eJI$hMv@!pk1Zzn{4UM&CRsaf*BcxBc@ofdOe zwKtaU?Y(gISB20XtLj4Knmtv|1$eg9>a%C^N<5u?LM*|cGIfi5)$4mDj>n%R&;9W3 zXK&Su?EeK^I;E_8B)H~HZe!+A^FLnw@b$WFc|Z32a%5j|w&jq!(0zwIR(FmI*Y%nf z6`g-`c*>dA8@BFKIp@4nOQA@E<+TMbHSNfM7nBCi$ zZ{B~Yaq>w%yS;K2)v*ahzyEFyGCk>3#&++n;Ek)Fp4&gze^FoIaLcXL69P4XDMYoz zHKHUXu_Vp=94Tr9Am;*I1c)I$ztaD0e0sx1`L5KhV literal 0 HcmV?d00001 diff --git a/recipes/icons/nowa_fantastyka.png b/recipes/icons/nowa_fantastyka.png new file mode 100644 index 0000000000000000000000000000000000000000..5c71a5c892dc58fe135e7479445934ec9161e453 GIT binary patch literal 1747 zcmV;^1}yoBP)Px#AY({UO#lFTCIA3{ga82g0001h=l}q9FaQARU;qF* zm;eA5aGbhPJOBUy24YJ`MF0Q)|Ns99Sohoj00rGiL_t(I%O%bIS5svi0Pyc~?`7L% zw2k+~coD`x6vZ4bfntD2ike!!bOH}Yz@sG|3$?6#$wQ>cDKn*)qar4zo)BL^Lr@G1 zP?&(CB#1DD$;-wryKCFs?!8Yx_z!$OXyrPe*r_Qv9#+7=@D17xQ-Fcl;Erp+1AJiy zJ_bDGp%qXC0@&atq>5+pCi(~EBL(f16}~!j1MKoRBp(;FV!xOsCP5ou*G5}`-ILof z(59bBK^H6oKiCW!V4)m;2IH^+@?ioMf(itDALNh$N5KjO&?Nqa4Rnuhk9U|RKEtrk zIxTbd^5}3)l%^@7N>QlXqewwVQ7bCPhj12zw`r@@*A`H?2v!2X+wch2U_FrV6upGQ zC<#hnADFNOnqd_B#9g8e?C1#^rWN5Op~s>&zSW)p%TFxtUv^IVvvgcqCa%K1_y$Tu zg($%c%mQY=`D<%3n@7Q?kb^U%)9Ljx9s`_#%g%$g0>>=e2$Qg07UBMo?vbBS?NP|B zJ1pnfH{2RLLgYDd&2cTU{&WR>o({g=clCJnR9)7c@;eRa8h=4N!CW1EFj_TWYAtR% zLWQNx-5m3;FOKq#Bt@N&CA919ZQb`rm|qO?m!Hhpv|PL7gmVOfAsPUVLqHxKU>gR+@^>u zc_%aNgYlRbr9(<0O$-+`VzO(R%T9!l)x=Nm0`zd`Rq!;ckL!yYxi$RIF#X=$8x{3i zjOM{W{Y6wr=!sPn_tcTgP2f(^h#yhlrk8xo(ME;I(0jO1)BF>Bp^8>af`v#yD>;KR zh?|2Xf=hS;sgMrouY%_BYsB^JODRYEOSeP4gL8TjO8yAn4*Ads`IMvEvD*Hnuv75h z^Iy~Y)O+4?UmFMk>_}vXqWf)Ow&6pvlza(Q=oa^YE9D-#-RJiUW$-=Jz}Z&;%y;Dp zT3j)CmQ5orl8AIb06GAR0pL1V39iU?+1`sMcnyDC3<4K;b5$yf;<0Q~;ISzRY9@!B z#b^V32|8yociefApCY95e$W6z(ErLOexY(n3~Nz5Vj&cAAP8^9X6Qws@Ek%3z66r7 znVO(gvL_NB`mF3VMn9ZtsORRB-ek07T|lA#Ka#Ix%~D_9UEC|w%Y3AtN$be($Xc>N zAjMqqJ=SQsYuOI>VJ1*e2c0krihzJ1@P`+`lObj@O{yg>_~-bX$8UL5x<|5p&(L!R zsh9|#jB?pjM#W|NTHr@h)s_S72%F*+M>J7wvTZ7vsM|5c&toO%ZJ3}f> z3V=3^U>! z70|)jEvL8TXoEA$7C+=;oR6J)OQvbR>6`H%^}p#819KxbMl2XQ^>k^!69qsKE<%xn znuv+$jx}i?aCI!Q+Kl~Uy0JHcBc?Y-R+`gCSC1_j9T@D>f7GM9t!rFHxpHkYZQJxM zopZa$;Px36Gvd96{I2@BTeHUQ7~hq}(qGX(24;qrt2JI<_^tL&5>5-zg7Dv7M#7Zf zVu;1r9`#B_b;elt{PUq=dA!1+cpPSral~}EnysC-OsXg0@A4+k$8LXz-+9xc-aFY# z$s4#|Ikmmadca!Xc2crcGS6+k!{+o)nek(gd|(!M6vnu zAnxjTkmvSX8X*PwE@i2r#NKELW9R-4%;D5@k4w=90000bbVXQnWMOn=I%9HWVRU5x zGB7bSEif}JFg8>$Fgi0hIx{mXFflqXFg_?jEC2uiC3HntbYx+4WjbwdWNBu305UK! pGc7PPEig7zFfckZF*-FdD=;xSFfihf%JcvL002ovPDHLkV1kxSEa3nE literal 0 HcmV?d00001 diff --git a/recipes/icons/tablety_pl.png b/recipes/icons/tablety_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..0c3a25813a7f13fad9c32692c49209bb3c87569c GIT binary patch literal 834 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5)E6J% z6XFU~ASWm1?&+9y`>FVlgV`Gz$n5d|z7!eVZoRX5CpPyGyn44D&RGF5M)zs9K znwl9C8wcb@gUQ&0Bu8iG^o)%E|Nk>GGB5%q7#LdG+QBr4Ff}!`u&|JllH%jzyK?2q z@#DukJ3FMMrLSMVzI*rXMT-^#JqPp%KLdjjACIk;n!BMvfR$x{v(xO^vo~(sIDPsI z25nP@Z4dd{76DCLym)bMZy$rE3B$@;JN6%BSaUkgMmJYgHkAcy7Oh%#?#P)#r`VolWn^W(e(~z1w1kx8=4KWPAD^>lbPpVu zB)sOxsawacox6AN;Kh?OxlcST+gMau{Qbk1Pv1U%eay)9gPnzqwY?>(wbk9l?IRQG zk3WmoCM7*?P_WZ$luv1KSQHvm#3iwWtBLn(xR>{NhGrkd4GkOW!^-$1o^(0HXw8}) z&%oB26tKNpT(sG-lO@C<$SW$!4H&3iV$W(uZ4zMuPzS0)t>=M2K^n_}OYeY#( zVo9o1a#1RfVlXl=G}bi$BC`+!11l2?D^oLV14AnVgJzeaeiRM4`6-!cmAEy0l00P$ P)WG2B>gTe~DWM4f7{Vqm literal 0 HcmV?d00001 diff --git a/recipes/konflikty_zbrojne.recipe b/recipes/konflikty_zbrojne.recipe index 8add89db94..b29e7e243b 100644 --- a/recipes/konflikty_zbrojne.recipe +++ b/recipes/konflikty_zbrojne.recipe @@ -7,7 +7,7 @@ class Konflikty(BasicNewsRecipe): __author__ = 'fenuks' cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg' language = 'pl' - description ='military news' + description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.' category='military, history' oldest_article = 7 max_articles_per_feed = 100 diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe index d1caa85950..c5fba54ab0 100644 --- a/recipes/kosmonauta_pl.recipe +++ b/recipes/kosmonauta_pl.recipe @@ -7,7 +7,7 @@ class Kosmonauta(BasicNewsRecipe): description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.' category = 'astronomy' language = 'pl' - cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' + cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' no_stylesheets = True INDEX = 'http://www.kosmonauta.net' oldest_article = 7 @@ -24,6 +24,5 @@ class Kosmonauta(BasicNewsRecipe): href = a['href'] if not href.startswith('http'): a['href'] = self.INDEX + href - print '%%%%%%%%%%%%%%%%%%%%%%%%%', a['href'] return soup - \ No newline at end of file + diff --git a/recipes/lomza.recipe b/recipes/lomza.recipe index d7e224d13d..2c31271624 100644 --- a/recipes/lomza.recipe +++ b/recipes/lomza.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Lomza(BasicNewsRecipe): title = u'4Lomza' __author__ = 'fenuks' - description = u'4Łomża - regional site' + description = u'Regionalny portal. Najświeższe informacje z regionu, kulturalne, sportowe. Ogłoszenia, baza biznesu, forum.' cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg' language = 'pl' oldest_article = 15 diff --git a/recipes/mlody_technik_pl.recipe b/recipes/mlody_technik_pl.recipe index 1eaa08d23a..4622e73909 100644 --- a/recipes/mlody_technik_pl.recipe +++ b/recipes/mlody_technik_pl.recipe @@ -7,7 +7,7 @@ class Mlody_technik(BasicNewsRecipe): description = u'Młody technik' category = 'science' language = 'pl' - cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg' + #cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg' no_stylesheets = True preprocess_regexps = [(re.compile(r"

Podobne

", re.IGNORECASE), lambda m: '')] oldest_article = 7 @@ -18,10 +18,17 @@ class Mlody_technik(BasicNewsRecipe): remove_tags = [dict(attrs={'class':'st-related-posts'})] remove_tags_after = dict(attrs={'class':'entry-content clearfix'}) feeds = [(u'Wszystko', u'http://www.mt.com.pl/feed'), - (u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'), + #(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'), (u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'), (u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'), (u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'), (u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'), (u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'), (u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')] + + def get_cover_url(self): + soup = self.index_to_soup('http://www.mt.com.pl/') + tag = soup.find(attrs={'class':'xoxo'}) + if tag: + self.cover_url = tag.find('img')['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/niebezpiecznik.recipe b/recipes/niebezpiecznik.recipe index b33a0a3513..a582a85aef 100644 --- a/recipes/niebezpiecznik.recipe +++ b/recipes/niebezpiecznik.recipe @@ -9,8 +9,8 @@ class Niebezpiecznik_pl(BasicNewsRecipe): oldest_article = 8 max_articles_per_feed = 100 no_stylesheets = True - cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png' - remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})] - keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})] + cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png' + remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})] + keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})] feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'), ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')] diff --git a/recipes/nowa_fantastyka.recipe b/recipes/nowa_fantastyka.recipe index 7715b9826a..1808d54824 100644 --- a/recipes/nowa_fantastyka.recipe +++ b/recipes/nowa_fantastyka.recipe @@ -9,7 +9,7 @@ class Nowa_Fantastyka(BasicNewsRecipe): __modified_by__ = 'zaslav' language = 'pl' encoding='latin2' - description ='site for fantasy readers' + description = u'Strona dla miłośników fantastyki' category='fantasy' masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg' #extra_css='.tytul {font-size: 20px;}' #not working diff --git a/recipes/pc_foster.recipe b/recipes/pc_foster.recipe index ab8c2b66b1..64bb3d76ee 100644 --- a/recipes/pc_foster.recipe +++ b/recipes/pc_foster.recipe @@ -7,12 +7,12 @@ class PC_Foster(BasicNewsRecipe): description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.' category = 'IT' language = 'pl' - masthead_url='http://pcfoster.pl/public/images/logo.png' - cover_url= 'http://pcfoster.pl/public/images/logo.png' - no_stylesheets= True - remove_empty_feeds= True - keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})] - remove_tags=[dict(name='p', attrs={'class':'right'})] + masthead_url = 'http://pcfoster.pl/public/images/logo.png' + cover_url = 'http://pcfoster.pl/public/images/logo.png' + no_stylesheets = True + remove_empty_feeds = True + keep_only_tags = [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})] + remove_tags = [dict(name='p', attrs={'class':'right'})] feeds = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')] @@ -32,4 +32,4 @@ class PC_Foster(BasicNewsRecipe): def preprocess_html(self, soup): self.append_page(soup, soup.body) - return soup \ No newline at end of file + return soup diff --git a/recipes/polska_times.recipe b/recipes/polska_times.recipe index 21104f1299..6fd6734c8c 100644 --- a/recipes/polska_times.recipe +++ b/recipes/polska_times.recipe @@ -7,9 +7,11 @@ class PolskaTimes(BasicNewsRecipe): language = 'pl' masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17' oldest_article = 7 + encoding = 'iso-8859-2' max_articles_per_feed = 100 - remove_emty_feeds= True + remove_empty_feeds = True no_stylesheets = True + use_embedded_content = False ignore_duplicate_articles = {'title', 'url'} #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) diff --git a/recipes/spiders_web_pl.recipe b/recipes/spiders_web_pl.recipe index 00e3041a5c..e2f9e6834d 100644 --- a/recipes/spiders_web_pl.recipe +++ b/recipes/spiders_web_pl.recipe @@ -4,7 +4,7 @@ class SpidersWeb(BasicNewsRecipe): title = u"Spider's Web" oldest_article = 7 __author__ = 'fenuks' - description = u'Opinie i analizy na temat technologii' + description = u'Autorskie teksty popularnych blogerów, testy sprzętu i aplikacji, oraz wiele więcej.' cover_url = 'http://www.spidersweb.pl/wp-content/themes/new_sw/images/spidersweb.png' category = 'IT, WEB' language = 'pl' diff --git a/recipes/tablety_pl.recipe b/recipes/tablety_pl.recipe index 1c3f46f967..97a44f81c7 100644 --- a/recipes/tablety_pl.recipe +++ b/recipes/tablety_pl.recipe @@ -3,7 +3,7 @@ import re class Tablety_pl(BasicNewsRecipe): title = u'Tablety.pl' __author__ = 'fenuks' - description = u'tablety.pl - latest tablet news' + description = u'Tablety, gry i aplikacje na tablety.' masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png' cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png' category = 'IT' diff --git a/recipes/tanuki.recipe b/recipes/tanuki.recipe index a615763307..6f37c17e7c 100644 --- a/recipes/tanuki.recipe +++ b/recipes/tanuki.recipe @@ -4,6 +4,7 @@ class tanuki(BasicNewsRecipe): title = u'Tanuki' oldest_article = 7 __author__ = 'fenuks' + description = u'Tanuki - portal o anime i mandze.' category = 'anime, manga' language = 'pl' max_articles_per_feed = 100 @@ -42,4 +43,4 @@ class tanuki(BasicNewsRecipe): a['href']='http://manga.tanuki.pl' + a['href'] elif 'tanuki-czytelnia' in soup.title.string.lower(): a['href']='http://czytelnia.tanuki.pl' + a['href'] - return soup \ No newline at end of file + return soup diff --git a/recipes/tvn24.recipe b/recipes/tvn24.recipe index ed0eae574f..22647e9e02 100644 --- a/recipes/tvn24.recipe +++ b/recipes/tvn24.recipe @@ -8,8 +8,8 @@ class tvn24(BasicNewsRecipe): description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata' category = 'news' language = 'pl' - masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' - cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' + #masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif' + cover_url= 'http://www.qzdrowiu.pl/Upload/KnowQZdrowiu_PressOffice/TVN24_logo_575702b7-edce-4b6f-a41b-4395f9456f96_ff6d6ccf-528a-4b94-9e61-2fed727aba35.png' extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}' remove_empty_feeds = True remove_javascript = True diff --git a/recipes/ubuntu_pl.recipe b/recipes/ubuntu_pl.recipe index 84912e44fa..4d2340ad84 100644 --- a/recipes/ubuntu_pl.recipe +++ b/recipes/ubuntu_pl.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Ubuntu_pl(BasicNewsRecipe): title = u'UBUNTU.pl' __author__ = 'fenuks' - description = 'UBUNTU.pl - polish ubuntu community site' + description = 'Polskie forum użytkowników Ubuntu Linux. Projekty, porady i dyskusje, gotowe rozwiązania problemów.' masthead_url= 'http://ubuntu.pl/img/logo.jpg' cover_url = 'http://ubuntu.pl/img/logo.jpg' category = 'linux, IT' From 66a13d29e3d30caa35479be0d4fe77afe0060d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Wed, 6 Mar 2013 20:39:17 +0100 Subject: [PATCH 4/4] new newspapers by fenuks --- recipes/dziennik_baltycki.recipe | 34 +++++++++++ recipes/dziennik_lodzki.recipe | 35 +++++++++++ recipes/dziennik_wschodni.recipe | 78 +++++++++++++++++++++++++ recipes/dziennik_zachodni.recipe | 34 +++++++++++ recipes/echo_dnia.recipe | 74 ++++++++++++++++++++++++ recipes/gazeta_krakowska.recipe | 34 +++++++++++ recipes/gazeta_lubuska.recipe | 64 +++++++++++++++++++++ recipes/gazeta_wroclawska.recipe | 34 +++++++++++ recipes/gazeta_wspolczesna.recipe | 63 ++++++++++++++++++++ recipes/gcn.recipe | 83 +++++++++++++++++++++++++++ recipes/glos_wielkopolski.recipe | 34 +++++++++++ recipes/icons/dziennik_baltycki.png | Bin 0 -> 865 bytes recipes/icons/dziennik_lodzki.png | Bin 0 -> 461 bytes recipes/icons/dziennik_wschodni.png | Bin 0 -> 414 bytes recipes/icons/dziennik_zachodni.png | Bin 0 -> 431 bytes recipes/icons/echo_dnia.png | Bin 0 -> 1163 bytes recipes/icons/gazeta_krakowska.png | Bin 0 -> 398 bytes recipes/icons/gazeta_lubuska.png | Bin 0 -> 1087 bytes recipes/icons/gazeta_wroclawska.png | Bin 0 -> 470 bytes recipes/icons/gazeta_wspolczesna.png | Bin 0 -> 921 bytes recipes/icons/gcn.png | Bin 0 -> 554 bytes recipes/icons/glos_wielkopolski.png | Bin 0 -> 446 bytes recipes/icons/kurier_lubelski.png | Bin 0 -> 483 bytes recipes/icons/kurier_poranny.png | Bin 0 -> 354 bytes recipes/icons/kurier_szczecinski.png | Bin 0 -> 1175 bytes recipes/icons/nto.png | Bin 0 -> 416 bytes recipes/icons/trojmiasto_pl.png | Bin 0 -> 537 bytes recipes/kurier_lubelski.recipe | 34 +++++++++++ recipes/kurier_poranny.recipe | 78 +++++++++++++++++++++++++ recipes/kurier_szczecinski.recipe | 27 +++++++++ recipes/nto.recipe | 63 ++++++++++++++++++++ recipes/trojmiasto_pl.recipe | 37 ++++++++++++ 32 files changed, 806 insertions(+) create mode 100644 recipes/dziennik_baltycki.recipe create mode 100644 recipes/dziennik_lodzki.recipe create mode 100644 recipes/dziennik_wschodni.recipe create mode 100644 recipes/dziennik_zachodni.recipe create mode 100644 recipes/echo_dnia.recipe create mode 100644 recipes/gazeta_krakowska.recipe create mode 100644 recipes/gazeta_lubuska.recipe create mode 100644 recipes/gazeta_wroclawska.recipe create mode 100644 recipes/gazeta_wspolczesna.recipe create mode 100644 recipes/gcn.recipe create mode 100644 recipes/glos_wielkopolski.recipe create mode 100644 recipes/icons/dziennik_baltycki.png create mode 100644 recipes/icons/dziennik_lodzki.png create mode 100644 recipes/icons/dziennik_wschodni.png create mode 100644 recipes/icons/dziennik_zachodni.png create mode 100644 recipes/icons/echo_dnia.png create mode 100644 recipes/icons/gazeta_krakowska.png create mode 100644 recipes/icons/gazeta_lubuska.png create mode 100644 recipes/icons/gazeta_wroclawska.png create mode 100644 recipes/icons/gazeta_wspolczesna.png create mode 100644 recipes/icons/gcn.png create mode 100644 recipes/icons/glos_wielkopolski.png create mode 100644 recipes/icons/kurier_lubelski.png create mode 100644 recipes/icons/kurier_poranny.png create mode 100644 recipes/icons/kurier_szczecinski.png create mode 100644 recipes/icons/nto.png create mode 100644 recipes/icons/trojmiasto_pl.png create mode 100644 recipes/kurier_lubelski.recipe create mode 100644 recipes/kurier_poranny.recipe create mode 100644 recipes/kurier_szczecinski.recipe create mode 100644 recipes/nto.recipe create mode 100644 recipes/trojmiasto_pl.recipe diff --git a/recipes/dziennik_baltycki.recipe b/recipes/dziennik_baltycki.recipe new file mode 100644 index 0000000000..3cbe3c0968 --- /dev/null +++ b/recipes/dziennik_baltycki.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class DziennikBaltycki(BasicNewsRecipe): + title = u'Dziennik Ba\u0142tycki' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Dziennik Bałtycki. Najnowsze Wiadomości Trójmiasto i Wiadomości Pomorskie. Czytaj!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/dziennikbaltycki.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds= True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] + + feeds = [(u'Wiadomo\u015bci', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_wiadomosci.xml?201302'), (u'Sport', u'http://dziennikbaltycki.feedsportal.com/c/32980/f/533756/index.rss?201302'), (u'Rejsy', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_rejsy.xml?201302'), (u'Biznes na Pomorzu', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_biznesnapomorzu.xml?201302'), (u'GOM', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_gom.xml?201302'), (u'Opinie', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_opinie.xml?201302'), (u'Pitawal Pomorski', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_pitawalpomorski.xml?201302')] + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/dziennik-baltycki/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) \ No newline at end of file diff --git a/recipes/dziennik_lodzki.recipe b/recipes/dziennik_lodzki.recipe new file mode 100644 index 0000000000..93a86fdaa2 --- /dev/null +++ b/recipes/dziennik_lodzki.recipe @@ -0,0 +1,35 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class DziennikLodzki(BasicNewsRecipe): + title = u'Dziennik \u0141\xf3dzki' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Dziennik Łódzki. Najnowsze Wiadomości Łódź. Czytaj Wiadomości Łódzkie!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/dzienniklodzki.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] + + feeds = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')] + + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/dziennik-lodzki/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/dziennik_wschodni.recipe b/recipes/dziennik_wschodni.recipe new file mode 100644 index 0000000000..b44bc3f639 --- /dev/null +++ b/recipes/dziennik_wschodni.recipe @@ -0,0 +1,78 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe +class DziennikWschodni(BasicNewsRecipe): + title = u'Dziennik Wschodni' + __author__ = 'fenuks' + description = u'Dziennik Wschodni - portal regionalny województwa lubelskiego.' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.dziennikwschodni.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + + feeds = [(u'Wszystkie', u'http://www.dziennikwschodni.pl/rss.xml'), + (u'Lublin', u'http://www.dziennikwschodni.pl/lublin.xml'), + (u'Zamość', u'http://www.dziennikwschodni.pl/zamosc.xml'), + (u'Biała Podlaska', u'http://www.dziennikwschodni.pl/biala_podlaska.xml'), + (u'Chełm', u'http://www.dziennikwschodni.pl/chelm.xml'), + (u'Kraśnik', u'http://www.dziennikwschodni.pl/krasnik.xml'), + (u'Puławy', u'http://www.dziennikwschodni.pl/pulawy.xml'), + (u'Świdnik', u'http://www.dziennikwschodni.pl/swidnik.xml'), + (u'Łęczna', u'http://www.dziennikwschodni.pl/leczna.xml'), + (u'Lubartów', u'http://www.dziennikwschodni.pl/lubartow.xml'), + (u'Sport', u'http://www.dziennikwschodni.pl/sport.xml'), + (u'Praca', u'http://www.dziennikwschodni.pl/praca.xml'), + (u'Dom', u'http://www.dziennikwschodni.pl/dom.xml'), + (u'Moto', u'http://www.dziennikwschodni.pl/moto.xml'), + (u'Zdrowie', u'http://www.dziennikwschodni.pl/zdrowie.xml'), + ] + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/dziennik_zachodni.recipe b/recipes/dziennik_zachodni.recipe new file mode 100644 index 0000000000..126c876937 --- /dev/null +++ b/recipes/dziennik_zachodni.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class DziennikZachodni(BasicNewsRecipe): + title = u'Dziennik Zachodni' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Dziennik Zachodni. Najnowsze Wiadomości Śląskie. Wiadomości Śląsk. Czytaj!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/dziennikzachodni.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds= True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})] + + feeds = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')] + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/dziennik-zachodni/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/echo_dnia.recipe b/recipes/echo_dnia.recipe new file mode 100644 index 0000000000..c84ef1d21f --- /dev/null +++ b/recipes/echo_dnia.recipe @@ -0,0 +1,74 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class EchoDnia(BasicNewsRecipe): + title = u'Echo Dnia' + __author__ = 'fenuks' + description = u'Echo Dnia - portal regionalny świętokrzyskiego radomskiego i podkarpackiego. Najnowsze wiadomości z Twojego regionu, galerie, video, mp3.' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.echodnia.eu' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + feeds = [(u'Wszystkie', u'http://www.echodnia.eu/rss.xml'), + (u'Świętokrzyskie', u'http://www.echodnia.eu/swietokrzyskie.xml'), + (u'Radomskie', u'http://www.echodnia.eu/radomskie.xml'), + (u'Podkarpackie', u'http://www.echodnia.eu/podkarpackie.xml'), + (u'Sport \u015bwi\u0119tokrzyski', u'http://www.echodnia.eu/sport_swi.xml'), + (u'Sport radomski', u'http://www.echodnia.eu/sport_rad.xml'), + (u'Sport podkarpacki', u'http://www.echodnia.eu/sport_pod.xml'), + (u'Pi\u0142ka no\u017cna', u'http://www.echodnia.eu/pilka.xml'), + (u'Praca', u'http://www.echodnia.eu/praca.xml'), + (u'Dom', u'http://www.echodnia.eu/dom.xml'), + (u'Auto', u'http://www.echodnia.eu/auto.xml'), + (u'Zdrowie', u'http://www.echodnia.eu/zdrowie.xml')] + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/gazeta_krakowska.recipe b/recipes/gazeta_krakowska.recipe new file mode 100644 index 0000000000..3abbcfdf39 --- /dev/null +++ b/recipes/gazeta_krakowska.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class GazetaKrakowska(BasicNewsRecipe): + title = u'Gazeta Krakowska' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Gazeta Krakowska. Najnowsze Wiadomości Kraków. Informacje Kraków. Czytaj!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetakrakowska.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] + + feeds = [(u'Fakty24', u'http://gazetakrakowska.feedsportal.com/c/32980/f/533770/index.rss?201302'), (u'Krak\xf3w', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_krakow.xml?201302'), (u'Tarn\xf3w', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_tarnow.xml?201302'), (u'Nowy S\u0105cz', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_nsacz.xml?201302'), (u'Ma\u0142. Zach.', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_malzach.xml?201302'), (u'Podhale', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_podhale.xml?201302'), (u'Sport', u'http://gazetakrakowska.feedsportal.com/c/32980/f/533771/index.rss?201302'), (u'Kultura', u'http://gazetakrakowska.feedsportal.com/c/32980/f/533772/index.rss?201302'), (u'Opinie', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_opinie.xml?201302'), (u'Magazyn', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_magazyn.xml?201302')] + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/gazeta-krakowska/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/gazeta_lubuska.recipe b/recipes/gazeta_lubuska.recipe new file mode 100644 index 0000000000..f14c0fcce2 --- /dev/null +++ b/recipes/gazeta_lubuska.recipe @@ -0,0 +1,64 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class GazetaLubuska(BasicNewsRecipe): + title = u'Gazeta Lubuska' + __author__ = 'fenuks' + description = u'Gazeta Lubuska - portal regionalny województwa lubuskiego.' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.gazetalubuska.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + feeds = [(u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'), (u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'), (u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'), (u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'), (u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'), (u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'), (u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'), (u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'), (u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'), (u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'), (u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'), (u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'), (u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'), (u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'), (u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'), (u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'), (u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'), (u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'), (u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'), (u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'), (u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'), (u'Sport', u'http://www.gazetalubuska.pl/sport.xml'), (u'Auto', u'http://www.gazetalubuska.pl/auto.xml'), (u'Dom', u'http://www.gazetalubuska.pl/dom.xml'), (u'Praca', u'http://www.gazetalubuska.pl/praca.xml'), (u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')] + + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/gazeta_wroclawska.recipe b/recipes/gazeta_wroclawska.recipe new file mode 100644 index 0000000000..5bcb5654c0 --- /dev/null +++ b/recipes/gazeta_wroclawska.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class GazetaWroclawska(BasicNewsRecipe): + title = u'Gazeta Wroc\u0142awska' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Gazeta Wrocławska. Najnowsze Wiadomości Wrocław, Informacje Wrocław. Czytaj!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetawroclawska.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] + + feeds = [(u'Fakty24', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533775/index.rss?201302'), (u'Region', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_region.xml?201302'), (u'Kultura', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533777/index.rss?201302'), (u'Sport', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533776/index.rss?201302'), (u'Z archiwum', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_zarchiwum.xml?201302'), (u'M\xf3j reporter', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_mojreporter.xml?201302'), (u'Historia', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_historia.xml?201302'), (u'Listy do redakcji', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_listydoredakcji.xml?201302'), (u'Na drogach', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_nadrogach.xml?201302')] + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/gazeta-wroclawska/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/gazeta_wspolczesna.recipe b/recipes/gazeta_wspolczesna.recipe new file mode 100644 index 0000000000..cfa70d4e2b --- /dev/null +++ b/recipes/gazeta_wspolczesna.recipe @@ -0,0 +1,63 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class GazetaWspolczesna(BasicNewsRecipe): + title = u'Gazeta Wsp\xf3\u0142czesna' + __author__ = 'fenuks' + description = u'Gazeta Współczesna - portal regionalny.' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.wspolczesna.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + feeds = [(u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'), (u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'), (u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'), (u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'), (u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'), (u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'), (u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'), (u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'), (u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'), (u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'), (u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'), (u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'), (u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'), (u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'), (u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'), (u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'), (u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'), (u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'), (u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'), (u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'), (u'Sport', u'http://www.wspolczesna.pl/sport.xml'), (u'Praca', u'http://www.wspolczesna.pl/praca.xml'), (u'Dom', u'http://www.wspolczesna.pl/dom.xml'), (u'Auto', u'http://www.wspolczesna.pl/auto.xml'), (u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')] + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/gcn.recipe b/recipes/gcn.recipe new file mode 100644 index 0000000000..3e4a3f365f --- /dev/null +++ b/recipes/gcn.recipe @@ -0,0 +1,83 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class GCN(BasicNewsRecipe): + title = u'Gazeta Codziennej Nowiny' + __author__ = 'fenuks' + description = u'nowiny24.pl - portal regionalny województwa podkarpackiego.' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.nowiny24.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'), + (u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'), + (u'Bieszczady', u'http://www.nowiny24.pl/bieszczady.xml'), + (u'Rzeszów', u'http://www.nowiny24.pl/rzeszow.xml'), + (u'Przemyśl', u'http://www.nowiny24.pl/przemysl.xml'), + (u'Leżajsk', u'http://www.nowiny24.pl/lezajsk.xml'), + (u'Łańcut', u'http://www.nowiny24.pl/lancut.xml'), + (u'Dębica', u'http://www.nowiny24.pl/debica.xml'), + (u'Jarosław', u'http://www.nowiny24.pl/jaroslaw.xml'), + (u'Krosno', u'http://www.nowiny24.pl/krosno.xml'), + (u'Mielec', u'http://www.nowiny24.pl/mielec.xml'), + (u'Nisko', u'http://www.nowiny24.pl/nisko.xml'), + (u'Sanok', u'http://www.nowiny24.pl/sanok.xml'), + (u'Stalowa Wola', u'http://www.nowiny24.pl/stalowawola.xml'), + (u'Tarnobrzeg', u'http://www.nowiny24.pl/tarnobrzeg.xml'), + (u'Sport', u'http://www.nowiny24.pl/sport.xml'), + (u'Dom', u'http://www.nowiny24.pl/dom.xml'), + (u'Auto', u'http://www.nowiny24.pl/auto.xml'), + (u'Praca', u'http://www.nowiny24.pl/praca.xml'), + (u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'), + (u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')] + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/glos_wielkopolski.recipe b/recipes/glos_wielkopolski.recipe new file mode 100644 index 0000000000..d7706c4173 --- /dev/null +++ b/recipes/glos_wielkopolski.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class GlosWielkopolski(BasicNewsRecipe): + title = u'G\u0142os Wielkopolski' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Głos Wielkopolski. Najnowsze Wiadomości Poznań. Czytaj Informacje Poznań!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gloswielkopolski.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds= True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] + + feeds = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')] + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/glos-wielkopolski/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/icons/dziennik_baltycki.png b/recipes/icons/dziennik_baltycki.png new file mode 100644 index 0000000000000000000000000000000000000000..6572da8a34f9633eee6528760c76882485f4157d GIT binary patch literal 865 zcmZ`&ZA?>V6g~w?h+lIveuiN;Cw`*0H8Y{%$7*PRY-p`yF~*n{TIy)aMOuQoKW^EU z=n}?sZa8sq>HxvP=0~8V7=j@3F{=eiTW;HX`?0pvf!y1BTNtc+`MDoU&N)w>bIy}D zZ_YUvWrqt^uh_Bz09F?j=F1VTUG|Jr6nEylaSTD4Mp7&R`2Cxei%LG?JcGR85VSwf zj36Q?E0h-loX-Zh@CCpEN?iy8)L;P93IK5fz{aXZ^U*w{Gs-?Hk;db3E*_7u9K&)f z$FVFIL(Xx@#YFO-`K5&f6Ju$VVp*DDQ2C)PSofG{c;X%QO#hwWmgYH{W@hJ@*EF*b zXW3Ycq3K|BR#Z0#+O8CBCulo))n>`nVe@z}5o40`Y%;-)dLmS02DQfMIh!ZUKVt?% zH^0V`T4UiEx;A9%0XOlC-Oucl z-ygI(DYMo6Iue?Vg#HW!Z}mC$U+RY{Gw4h_ooRyb`Fw#XH|6#E{FL8GkPd>31OgQ0 z>v-n;>Y?qk`!AdN9c~Zhobo!Rypt}E-RW_-JU)^P`=|g(*&W1$gBYJAY<6O7k{GkQ zUXDAH$z*W}rd%Fe>4ydRpB}M2T0~@|C|^>tX{<~kNWnzH0}ljU_jIo^8yim&d6o}) z{}>tR9klKc?0j$UySuaZ?R#g}z@Q*!+nyaAH(T!9yVcs-)c*6(&0l`K-PVjNPOEgK z73G!6A%$9BrqyUV@F*F+L+bPcq;Gm}C_7ZC#qnlMl#29`LH&$B8rn)`wcyQd>jDF7 zGvC^>AuH?6&FkM@7bUlDwAR-*eAjsSa{Z6r|Mcy(t2Y{Y>#tn@AvJBD|D{wqkju|q zwOuAV@$sosM^DK6WEaynwY~r_^yj57-W4{XZ*4b9j~i7=qgq^6twsPMOn4v%Lw2wP z%N4(WP%ISf!GvNAYrE33^dEs9SA9}`?*9d@=X`VquzC4~V|cl-R#~kEi3%@O3ySng Zjasf&s?MDmQlnJ|D3Tt|Z$G5?{9i(5!dU45bDP46hOx7_4S6Fo+k-*%fF5lt>8h z332^>cG>^`|GzdV{8*Xx=fUysQ=Ptb>HSz3@okdLpBp=WpO~vGuQ0K{zceG>-r8o@ z&Yd4We*ATy|HzDDLtVWS$B)mcjGH%i?&HUgA8qN43XhnP;nZALDh|8TB2Tyo8SMRcF-wC zZr6+bf378^X>`SIdd02~8YaniAn5T9W=_vGcIGbGa!vQ4TlM>P z9#sdsBR@DxBz`?Py|*?>+U?Qd8Ry%3r|!7%;duPET1HV{mId;!r^W;ARxNRjC`m~y zNwrEYN(E93Mh1q)x&}aG7Ghv&Wom9^Y^-fyXk}oqfcX|E9uOLG^HVa@DsgML7?-pS PsDZ)L)z4*}Q$iB}ao(*c literal 0 HcmV?d00001 diff --git a/recipes/icons/dziennik_wschodni.png b/recipes/icons/dziennik_wschodni.png new file mode 100644 index 0000000000000000000000000000000000000000..10c0b4fb58f6883dcc9e4536d6e58313ee9124f6 GIT binary patch literal 414 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPF=ES4z)+>ez|hdb!0-zw)bN6Vq11qZ;Z*_ygVhWM2JwP9y8>;15(WW2 zA+A9BU0V5F2EqS#@BRmZ-Dlp-TyAf83^R0c)B=-a9mGLaA0!s@=Cbn(a`8BdL)1$ zX&DO}TiAvI$q8(o1_q3Z9SaW}S-`?1HA{xaE7HryOUmmFkL%45bDP46hOx7_4S6Fo+k-*%fF5l<*7i z32~jVa`X4gHy;LMysYW__2}9E|Np-pJ#$^m^uzMapSB%%-8JjJQ}naE#y=lEZP>E+ z#{EZkjQv)$Oy0Er*p`f%r!U`JG7H#oEak7ak=#TO|b?6k=BQ68X(zm0Xkxq!^4042^XSfXFPwz|zXp(#qIO+rZGu kz~IfNqjD%3a`RI%(<*Umn7h7q5l{nzr>mdKI;Vst0M1>dvH$=8 literal 0 HcmV?d00001 diff --git a/recipes/icons/echo_dnia.png b/recipes/icons/echo_dnia.png new file mode 100644 index 0000000000000000000000000000000000000000..a7454b99647772215e7d2ff0393059daccf1e0ae GIT binary patch literal 1163 zcmZ`&c~Db#5dI;8C=YETDjqmOM{Gqjfr4YIorc2zB@q%!5i4>e;TEMx1+fJM3WADI zDKJ2bRRjw-DAK_ph*(i51UbTygEKsHy%!P^AeoTY$DjS9JG=Ysx3k|rJG=RTbgIp! zU7G*^Hh#X|3>-gN3oCOxf9kS@iNpG63XK9l^BrqrgayvczGqN9K@)F}3@7`dd>J$V z(wzXv%m%=Kw=$&wTqXh_4+nr;2EdMsl`n#h1F$YFFyOQgK5&qW**-web}to)vCI*J zKX>ty2G#Cbgx0=z^tsl)UbW}3%Dx`8uTvfAz$|}9^7{~zUaM217>4O}>TLx*;I>%0 zY%J*-2RVFDBnAaSP$=42uL^&sS1F_i^0*fHJZinL^yQ#+9*^EC!&X!$%U+t@odQLo z&+Ddb%f+^}=+$9^0!4!wrmOg9#<&S zk12IZm?scv7d5>jW0*nLC!C#EFAt85Yc&gh4T?-AV?nPBR4dE)s`(Fa@Nch&reTM? zmjO*;h5W0mw$Ct3-ImP~a?>9fv4rsB^ zKSxsEA4d6+-t=nk_hSYI zvd*99L`5bg#K&=1QWz%GF03ycw7xrX@Q+D)lPJAy?2S-@AjqOdqgJa`Ds;^dM4?ca zOeWvASZvR7tpt;-VhJ}U2>9`tr7-AB@D)7HStF5OQ@w4Ar+VD++RD$Ha@uY#lotIo^+)B@{D;T%RUGkY>|O|K5X->Jh`dAqR?JPetajY#(Z;_Vc^Jf1;2x8A~yY~&>{rP2`%`IBL3j{SLBKB41 z#3#7dR<}CQEBZQ#Y45bDP46hOx7_4S6Fo+k-*%fF5lrRqP z331)H^TPlC|37{CdFJwqoA=)J%{^@zxOmoziy5`M8Ydsyy!Vb%%*yDZEze$l_Df&C zZ1eS!jspf$f1d_wXH4>TcVYa`qFRasImQP_8JvwnC}Q!>*kaclUqG{*s`fx*+&&t;ucLK6TO;g16V literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_lubuska.png b/recipes/icons/gazeta_lubuska.png new file mode 100644 index 0000000000000000000000000000000000000000..6f2c30992e60ba3d94f8a6d85edc21d4a63fbc46 GIT binary patch literal 1087 zcmZ{iYfO^|6owD9+`j@M%BFKe1SUF6O9P2jmqECXcHAtiiA#}!VGicFxM-A!6BbY> zpkgJ$rrRKPP>}&{NTFPW0j1@#a_LuEOKEK>1zJ|5<a&aylzgy0|qCm}e?fq(+2?v`7vR+H+r(8Fuk-bsLW6}Wi{-1i6w zy9J1cz-5=f!Bt>KklPTXfH#f|E7^C^yex5Kq^9o4G>!IlQLV18(f4=i`r4O=`ZNtc zOPOgX*vx{RVel{Iy^4)>la^b*HZL0|2QPJZ&ELGKALvq6-C64GT$vQADk>xZ5*%*1 zr%Tt>wkk(;FI!D&`BZeQs;Xipm2MD=X6PxZ2lo~S_!>?<3by7J?p#Tcabi^4SZ7|; z>e{#)>;LGx|1eI9W;0m&UY==QZX6$x;R#l)X4E%8QTE*wBUPN5Ag0Hoh57RPw`R_> zWMxHD2~^3qIbwRkgmiKIQwZ-xM!t&Fc^c4nDEf#cP7%Ae7F@S_^9Cmzn{++b`QQ9=^Y*r;cnPo zN06`{pC@PMJSr2%gPL=V&InQ#J9LEe<-)#dhJ@07*@7VUC6WF9b(!kDRrJY_wu=a| z=K2Foc#NB~^nUFZvX46vq;T8ErYN-g;NvvKu8M>)1R*{D#^6*MGZ^qROZ)D_&k?)?YWO2r^Ugo1t!96>PTQ?w*CNu)SQ_;#k;z-+n-i_GGn1diy-7o zVfCqVOC4sHy~h|0t*iiCZ?ozsoy*{5>f25GLvkb?#YJhlHvNfrtN1e*)QGv$8TT{eNJx#I6<#KuiQJoEyfx{x2gk5*q*j literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_wroclawska.png b/recipes/icons/gazeta_wroclawska.png new file mode 100644 index 0000000000000000000000000000000000000000..6003bd2823820ee8a117e7e6c94e39bf18e04a87 GIT binary patch literal 470 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5lt>Qn z32}Y$Zqxt&|F`bh);YC%#-f>zo;*5o>ZqZciHNrN%oRd;L?)@|AfH6 zETy$44Yyx5u9~7LXC`^>VXZ+92Q|4h2~fSfW<7sn8Z%c=dhnVJ*?Ty}Q&dc6qCSQhsG|I|ZD zX_fcB*_=7URp{_EI!T?bBvhcME-ZxlFHn z-iw2?Tu;b^ee_wlu*65H)k`$mdF}j98)lY2{CaPh^xHe>_x^9Hf7j0{eT~h>|D200 z&;_a`t`Q|Ei6yC4$wjF^iowXh&{)?1h|EF^4Xg}|tqcvc4GgUe3^u+{@Ild#o1c=I YR*74~fvqz>05vdpy85}Sb4q9e0GB|oHvj+t literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_wspolczesna.png b/recipes/icons/gazeta_wspolczesna.png new file mode 100644 index 0000000000000000000000000000000000000000..c21b6fa4140313ac07f23c1ee3bb71e65eb29ba5 GIT binary patch literal 921 zcmZ`#dr(YK7(YAFkV_iR@qVVZEq5PQd#$wD+Ctab(qK%=);1YpDn^SY6r!QWzT3*u zCa?6EVdOC$vnKus!bN5o*f&bhl7`OcsIFmvxc_xsNG{a%%NUN<~sLI^@=cuKO? z0Ov^h4he#9M098x9E0*SsTzbjjtu*qJs4_HkwKS$+IlCwhhmyB*^r8mIUJ#~a)f?D ztE?ZPtt>(xvJm1N2#wp~?p~pWy#oER^h6jS9vx{~Oq{XAr6NbQLW>^PMhYia5>FDg zD@Z*@np6E9Cc(=Gu9QNF?Q+r>PuyzKun=44VoMBm%oAF8p*0=bq~w?mK(Hm6I25E| z5d}Q40-&HTG%v%>`Pd@CP9^+UgDc{Wqkw=` z&w&P*hdw2$S5bDm9D_m-b1WJ3Q{JWo>Zl=(iU)ich8dfT0Ha;z@5&d>W@0N#k)89Y zW>;+B$}Vaac!)(h*N;LTK3B8<^XYX5n*FzZ8@uej3;v=} z%1f{M9^d@v-;nw1-J6^*qaDxsak;X3kkQ^$F(+^Pu8Ctxr|&F$7z(_i^-Fcy63^3z z_nU0xxa5xb)P@feTx*_iOt;U>{Pg;Q_a)s5Z!rblGGlXX zOvWSS`E6u`>M-BSFfppOg3nVL6EmO3T`m11Vwl~zgDW(BD%JIh=^kJ9tN65;WxWxH zst)#6R-Qc&w0cxiwZr1Bb=ewgoYvH;<)Xf}Ihr3suZh9I?Bc{##W~r0!LIhH-mijMrVuxWEnzNzH@3r{v(l*0l_N#wO{30Zso0zK)f AX8-^I literal 0 HcmV?d00001 diff --git a/recipes/icons/gcn.png b/recipes/icons/gcn.png new file mode 100644 index 0000000000000000000000000000000000000000..287488590419379433811d938dd1ba9bd96749cb GIT binary patch literal 554 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5l-L;H z6XFV_ADVgIS1`PC#j~MtLF$h0mkbKM_`aoL$o}tVA#DYgGKq-lb zyef|w#U3k}KLQ%Xsq~mt?w+R2V=f>=_O_ApZ9}K0GRBXYrEeQJJZ2Dl#31^ZL*bc< z)e`}=CxRMJM6~Z%c|YY-ej=#;R6-BvTHCq5<$)AqNswPKgTu2MX&_FLx4R2N2dk_H zkdyD};us=vIXOXr-N49jvw^{eO&g0HSS@z$sI2(8;|D8Cs{+#@rA0}PgeC0=rS*gkDI-_BV(1f6{z<^fn6^xptnXJ4!8q_9SdgLa2LCw*@y)|Ck45bDP46hOx7_4S6Fo+k-*%fF5l!yxO z32~kB`q%&e|F2!WT2@>V6&|r>^=d<1J!N@?iT(X2jvs&g_;E&Rn!UA6?v&Xb6DHM9 zoLV+{2E(HL`(M28zxLQ*%V~vm#{>3XuD@{KVdwezcb?z>{C)kS*9E6<3#>To;phGZ zXbWSKx4R4DeY)jn(UyhfJCS7%b zbCyK8nLjxyUOnOVp2*hw{Q_R!G(#Q+ojzCiRw{4qM+cS?TQ(_2eV-ZPjy_zUcuqu$ zn%OhWW;(g0`yxBL-@%`a3%G?k*|x7;;p_mkQnkc2q9i4;B-JXpC>2OC7#SEE>ly%& yS%{&5m4T_1vAMQ^p_PGwbxGzA6b-rgDVb@NxHWibUUUI!VDNPHb6Mw<&;$Uyj-g@z literal 0 HcmV?d00001 diff --git a/recipes/icons/kurier_lubelski.png b/recipes/icons/kurier_lubelski.png new file mode 100644 index 0000000000000000000000000000000000000000..a7d1a69752216592e482f6d918c64774e2ecac7c GIT binary patch literal 483 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5lt>Hk z32{CD=;QzY{~dac%|H6+$EvlTLZiO|HKuLu-*fL<@5Hw3h8Y`inntl|l2tDHc&P>*jsyo0PEN!g~w*dy2a2&p%&r`l&+Y z-oD-U(wAJ)yuIK%&_>22Z+92Q|4h2~K<@H%aSV~ToOvH0_*N6P|UwLW=Q+rCeuzteP)`i3wg-nauwmlLG-G&)E& zO8@!B=-R;+TG|}(80Z$&64!{5l*E!$tK_0oAjM#0U}&ss07PaXh6Yv!W>$uV+6IPJ k1_mFuJ=lYyAvZrIGp!Q0hKAP>K=lj^p00i_>zopr03)}$o&W#< literal 0 HcmV?d00001 diff --git a/recipes/icons/kurier_poranny.png b/recipes/icons/kurier_poranny.png new file mode 100644 index 0000000000000000000000000000000000000000..9c2742a1eb5b45b6a00a2fe081c17b4abaec0036 GIT binary patch literal 354 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPF=ES4z)+>ez|hdb!0-zw)bN6Vq11qZ;Z*_ygVhWM2JwP9y8>;15)uJE zA+G;{VE4@b9clmHz5Cx0`rlaTKLf-6@=&kl#UM3|N#5=*3>~bp9zc$-r;B3<$Mxid z24*)^RnpgtlOqgAJeZpp43tfEbUDv>B=LE-8%P`qQVd#}sp-IG?NMkjMKF&c zO~tS|Lx4GnRjv8r(g}=`3=F&Eg)Yxq?JWm1Shd78q9i4;B-JXpC>2OC7#SEE>ly%& yS%{&5m4TU+iLthUp_PGw-pT(TQ8eV{r(~v8;?@v9#T^(P3=E#GelF{r5}E*lZ)9Wu literal 0 HcmV?d00001 diff --git a/recipes/icons/kurier_szczecinski.png b/recipes/icons/kurier_szczecinski.png new file mode 100644 index 0000000000000000000000000000000000000000..06aadc5529bd8a7c45c53ddb0240ae70b05ed6a9 GIT binary patch literal 1175 zcmZ`%drXs86u-PWr(>8o=G@Q_1omJ}T52G6E&;;BMl4VYw6uJEetiIatWZ!!1%WXf zFJX&-aSq%Ns%#1d``DxfCXxa2kQK^kSIVx#WZ54(Io~}w_xyh6_1!b^ zj2m*Z@igs1OQiJd@MBz0zQ@q<(YyknYgt0 zlr){6A>=UVRFuPzh@Q)V4yQ}NLgVC+6t(NACv82d`3+IN1@4zW)sw=cZgJWZ34Gs14^K(|K zwXi^?%#uBOw!C4p)z{bYxk!#uVKC_X`}?_^G_7{fX0y%D&!19#f-sZ&zW>hYbn0|E zu?Rg|UB0leFf}zbIywrM|BdE~!{IQS%~=XLo0U3YHjRvoOiWB@G@ACdW}9teW@e_O z_;ehZbop|}($Z3AN5{&_%KG{`+=IpP$L#Fv@bKN<-fPRtOKWRuk01ZGx#_Um?M>=N zqtT$(>x~A3Ry$-endau^;8sNf4wubjBdKhJjxbVka}-aWO#kI@TxDmy#Yke% zC}Nb|-rn{~NWv}ndT7}c^7Ci;uegTNQeH@s*A4rh+p;l+omF$}R*u{GsrS9_b$Jq( z@?}m-%#K#0kFTNiCCZ9ZR<(He{!g#x7Hmr)#a%tnJpdECaF4}A3O>+33V@D-6d6u^ zQ^#Zf2rByW9j-+YxBdM5nz_@jg}IR46YrxRKI!4%5fy-XV=#e9*UE-F=IVaEu~9PQ zufz!6@En=>Qs1*yasSGq4_1PS-aldPa}?cfp04eA82ph6lk7HV`TXaeOHVJ1_uR}o z$isx}(Usn5`~95p`g%_lnUI5dHLF~+te(8(e0h&9ke=FwrWhI%4l?fW*tHjnbZ0+O#Jm%Y5uW!B3QGA}iX_%uHySF-8wOMpcF(s%C7Q`@r76cN z?f?dJb^nLC0sj7rli%jcYI?m6x)on-DhTu4?e6heN$IBqZr%Z~@B>*H8iu_G_YjaB zMa$-MvQZ*WfkFd#}V;|h45bDP46hOx7_4S6Fo+k-*%fF5lyD00 z32|lXzQxdU=l}oz%zc*`doD8c-(~DQ&Dnm1x$7cp&#nJ||FL(TW$U=f+i~Il|9_0# z*BH94{{Q!jqwB=GKmVC0-1+^8CF9M$9YE64fJ82rl7Rdn3W&i}A&TH+c} zl9E`GYL#4+3Zxi}3=EBR4S>ij#L&RX(9p`*LfgR5%D{j%GHo@AhTQy=%(P0}8cJC1 Ri32q-c)I$ztaD0e0sz|Yj&1+| literal 0 HcmV?d00001 diff --git a/recipes/icons/trojmiasto_pl.png b/recipes/icons/trojmiasto_pl.png new file mode 100644 index 0000000000000000000000000000000000000000..665a62b0b5d049a72b7e4004b0eede4e3c731506 GIT binary patch literal 537 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD45bDP46hOx7_4S6Fo+k-*%fF5l-L#E z6XN>+|NkF9eqOln;^fI^Po8}E|Nq~uTc2}sKFP|yQ)~tIA7To;u<@<^i+uprYh7ML)47$EJWW&aRHpQ87_4 za7M!n3vK4rElZ|s=~**p4^zzCMUyslt(vu~P3?|~Zmz6ptZgl`^Nfy(8#`Cd+}XNR zbe8a%+zYa)-qXS_Ffe>v$Ue_+=^`VbZK@@%5hW>!C8<`)MX5lF!N|bSSl0lE%t8ze utPD-8jE%Gn46O_d+!;PCMA49&pOTqYiCcqNtd}fM1B0ilpUXO@geCy2o$QAI literal 0 HcmV?d00001 diff --git a/recipes/kurier_lubelski.recipe b/recipes/kurier_lubelski.recipe new file mode 100644 index 0000000000..4ae40b0eb5 --- /dev/null +++ b/recipes/kurier_lubelski.recipe @@ -0,0 +1,34 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class KurierLubelski(BasicNewsRecipe): + title = u'Kurier Lubelski' + __author__ = 'fenuks' + description = u'Gazeta Regionalna Kurier Lubelski. Najnowsze Wiadomości Lublin. Czytaj Informacje Lublin!' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/kurierlubelski.png?24' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + use_embedded_content = False + ignore_duplicate_articles = {'title', 'url'} + #preprocess_regexps = [(re.compile(ur'Czytaj także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur',Czytaj też:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Zobacz także:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'

', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TEŻ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ WIĘCEJ:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'CZYTAJ TAKŻE:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: ''), (re.compile(ur'Nasze serwisy:.*', re.DOTALL), lambda match: '') ] + remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}) + remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})] + + feeds = [(u'Wiadomo\u015bci', u'http://kurierlubelski.feedsportal.com/c/32980/f/533785/index.rss?201302'), (u'Region', u'http://www.kurierlubelski.pl/rss/kurierlubelski_region.xml?201302'), (u'Sport', u'http://kurierlubelski.feedsportal.com/c/32980/f/533786/index.rss?201302'), (u'Kultura', u'http://kurierlubelski.feedsportal.com/c/32980/f/533787/index.rss?201302'), (u'Rozmaito\u015bci', u'http://www.kurierlubelski.pl/rss/kurierlubelski_rozmaitosci.xml?201302'), (u'Dom', u'http://www.kurierlubelski.pl/rss/kurierlubelski_dom.xml?201302'), (u'Serwisy', u'http://www.kurierlubelski.pl/rss/kurierlubelski_serwisy.xml?201302'), (u'Motofakty', u'http://www.kurierlubelski.pl/rss/kurierlubelski_motofakty.xml?201302'), (u'M\xf3j Reporter', u'http://www.kurierlubelski.pl/rss/kurierlubelski_mojreporter.xml?201302'), (u'Praca', u'http://www.kurierlubelski.pl/rss/kurierlubelski_praca.xml?201302')] + + def print_version(self, url): + return url.replace('artykul', 'drukuj') + + def skip_ad_pages(self, soup): + if 'Advertisement' in soup.title: + nexturl=soup.find('a')['href'] + return self.index_to_soup(nexturl, raw=True) + + def get_cover_url(self): + soup = self.index_to_soup('http://www.prasa24.pl/gazeta/kurier-lubelski/') + self.cover_url=soup.find(id='pojemnik').img['src'] + return getattr(self, 'cover_url', self.cover_url) diff --git a/recipes/kurier_poranny.recipe b/recipes/kurier_poranny.recipe new file mode 100644 index 0000000000..f303eb3295 --- /dev/null +++ b/recipes/kurier_poranny.recipe @@ -0,0 +1,78 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class KurierPoranny(BasicNewsRecipe): + title = u'Kurier Poranny' + __author__ = 'fenuks' + description = u'Kurier Poranny | poranny.pl - portal miejski Białegostoku,informacje,wydarzenia' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.poranny.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + + feeds = [(u'Wszystkie', u'http://www.poranny.pl/rss.xml'), + (u'Białystok', u'http://www.poranny.pl/bialystok.xml'), + (u'Bielsk Podlaski', u'http://www.poranny.pl/bielskpodlaski.xml'), + (u'Czarna Białostocka', u'http://www.poranny.pl/czarnabialostocka.xml'), + (u'Hajnówka', u'http://www.poranny.pl/hajnowka.xml'), + (u'Łapy', u'http://www.poranny.pl/lapy.xml'), + (u'Sokółka', u'http://www.poranny.pl/sokolka.xml'), + (u'Supraśl', u'http://www.poranny.pl/suprasl.xml'), + (u'Wasilków', u'http://www.poranny.pl/wasilkow.xml'), + (u'Sport', u'http://www.poranny.pl/sport.xml'), + (u'Praca', u'http://www.poranny.pl/praca.xml'), + (u'Kultura', u'http://www.poranny.pl/kultura.xml'), + (u'Dom', u'http://www.poranny.pl/dom.xml'), + (u'Auto', u'http://www.poranny.pl/auto.xml'), + (u'Polityka', u'http://www.poranny.pl/polityka.xml')] + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/kurier_szczecinski.recipe b/recipes/kurier_szczecinski.recipe new file mode 100644 index 0000000000..b19e5ecd79 --- /dev/null +++ b/recipes/kurier_szczecinski.recipe @@ -0,0 +1,27 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class KurierSzczecinski(BasicNewsRecipe): + title = u'Kurier Szczeci\u0144ski' + __author__ = 'fenuks' + description = u'24Kurier jest portalem Kuriera Szczecińskiego. Zawiera aktualności ze Szczecina oraz wiadomości regionalne z województwa zachodniopomorskiego. ' + category = 'newspaper' + #publication_type = '' + language = 'pl' + #encoding = '' + #extra_css = '' + cover_url = 'http://www.24kurier.pl/Administracja/Img/24kurier_logo-copy-po-zapis' + #masthead_url = '' + use_embedded_content = False + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + remove_javascript = True + remove_attributes = ['style', 'font'] + ignore_duplicate_articles = {'title', 'url'} + + keep_only_tags = [dict(attrs={'class':'section'})] + remove_tags = [dict(attrs={'class':['Ikonki', 'rek', 'artComments']})] + remove_tags_after = dict(attrs={'class':'artComments'}) + #remove_tags_before = dict() + feeds = [(u'Aktualno\u015bci', u'http://www.24kurier.pl/cmspages/articles_rss.aspx'), (u'Kraj', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kraj'), (u'\u015awiat', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=swiat'), (u'Sport', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=sport'), (u'Kultura', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kultura'), (u'Gospodarka', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=gospodarka'), (u'Nauka', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=nauka'), (u'Region', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=region'), (u'Szczecin', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=szczecin'), (u'Bia\u0142ogard', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=bialogard'), (u'Choszczno', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=choszczno'), (u'Drawsko', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=drawsko'), (u'Goleni\xf3w', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=goleniow'), (u'Gryfice', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=gryfice'), (u'Gryfino', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=gryfino'), (u'Kamie\u0144 Pomorski', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kamien'), (u'Ko\u0142obrzeg', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kolobrzeg'), (u'Koszalin', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=koszalin'), (u'\u0141obez', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=lobez'), (u'My\u015blib\xf3rz', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=mysliborz'), (u'Police', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=police'), (u'Pyrzyce', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=pyrzyce'), (u'S\u0142awno', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=slawno'), (u'Stargard', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=stargard'), (u'Szczecinek', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=szczecinek'), (u'\u015awidwin', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=swidwin'), (u'\u015awinouj\u015bcie', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=swinoujscie'), (u'Wa\u0142cz', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=walcz')] diff --git a/recipes/nto.recipe b/recipes/nto.recipe new file mode 100644 index 0000000000..56725adec4 --- /dev/null +++ b/recipes/nto.recipe @@ -0,0 +1,63 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class NTO(BasicNewsRecipe): + title = u'Nowa Trybuna Opolska' + __author__ = 'fenuks' + description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.' + category = 'newspaper' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + INDEX = 'http://www.nto.pl' + masthead_url = INDEX + '/images/top_logo.png' + oldest_article = 7 + max_articles_per_feed = 100 + remove_empty_feeds = True + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] + + keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] + remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', + 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + dict(attrs={'class':'articleFunctions'})] + + feeds = [(u'Wszystkie', u'http://www.nto.pl/rss.xml'), (u'Region', u'http://www.nto.pl/region.xml'), (u'Brzeg', u'http://www.nto.pl/brzeg.xml'), (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'), (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'), (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'), (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'), (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'), (u'Nysa', u'http://www.nto.pl/nysa.xml'), (u'Olesno', u'http://www.nto.pl/olesno.xml'), (u'Opole', u'http://www.nto.pl/opole.xml'), (u'Prudnik', u'http://www.nto.pl/prudnik.xml'), (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'), (u'Sport', u'http://www.nto.pl/sport.xml'), (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'), (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'), (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'), (u'Studia', u'http://www.nto.pl/akademicka.xml')] + + def get_cover_url(self): + soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') + nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] + soup = self.index_to_soup(nexturl) + self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] + return getattr(self, 'cover_url', self.cover_url) + + def append_page(self, soup, appendtag): + tag = soup.find('span', attrs={'class':'photoNavigationPages'}) + if tag: + number = int(tag.string.rpartition('/')[-1].replace(' ', '')) + baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + + for r in appendtag.findAll(attrs={'class':'photoNavigation'}): + r.extract() + for nr in range(2, number+1): + soup2 = self.index_to_soup(baseurl + str(nr)) + pagetext = soup2.find(id='photoContainer') + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoMeta'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + pagetext = soup2.find(attrs={'class':'photoStoryText'}) + if pagetext: + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + return soup diff --git a/recipes/trojmiasto_pl.recipe b/recipes/trojmiasto_pl.recipe new file mode 100644 index 0000000000..8ac35c2edb --- /dev/null +++ b/recipes/trojmiasto_pl.recipe @@ -0,0 +1,37 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class Trojmiasto(BasicNewsRecipe): + title = u'Tr\xf3jmiasto.pl' + __author__ = 'fenuks' + description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl' + category = '' + #publication_type = '' + language = 'pl' + encoding = 'iso-8859-2' + extra_css = 'ul {list-style: none; padding:0; margin:0;}' + cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif' + #masthead_url = '' + use_embedded_content = False + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_empty_feeds = True + remove_javascript = True + remove_attributes = ['style', 'font'] + ignore_duplicate_articles = {'title', 'url'} + + preprocess_regexps = [(re.compile(ur'Czytaj więcej.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur'[A-ZĄĆĘŁŃÓŚŹŻ \-,.:]*?', re.DOTALL), lambda match: ''),] + + #keep_only_tags = [] + remove_tags = [dict(id=['logo', 'font_small', 'font_big']), dict(attrs={'class':['title-long', 'ankieta', 'newsletter-inside-content newsletter-wrap', 'copyright_box', + 'logo', 'btn btn-photo-add', 'related-info-wrap', 'nTabs', 'article-list', 'rate-player horizontal', 'type-box', 'rate-player' + 'hover-nav', 'live-head tC', 'prev-link', 'next-link', 'ie6']}), dict(attrs={'title':[u'drukuj artykuł', u'podziel się na Facebooku', u'prześlij artykuł']})] + remove_tags_after = dict(attrs={'class':'author-wrap'}) + remove_tags_before = dict(attrs={'class':'text-container'}) + + feeds = [(u'Wszystkie', u'http://rss.trojmiasto.pl/rss,0.xml'), (u'Fakty i opinie', u'http://rss.trojmiasto.pl/rss,1.xml'), (u'Sport', u'http://rss.trojmiasto.pl/rss,2.xml'), (u'Dom', u'http://rss.trojmiasto.pl/rss,3.xml'), (u'Moto', u'http://rss.trojmiasto.pl/rss,4.xml'), (u'Nauka', u'http://rss.trojmiasto.pl/rss,5.xml'), (u'Rozrywka', u'http://rss.trojmiasto.pl/rss,6.xml'), (u'Kultura', u'http://rss.trojmiasto.pl/rss,7.xml'), (u'Rowery', u'http://rss.trojmiasto.pl/rss,8.xml'), (u'Dziecko', u'http://rss.trojmiasto.pl/rss,9.xml'), (u'Zdrowie i uroda', u'http://rss.trojmiasto.pl/rss,10.xml'), (u'Praca', u'http://rss.trojmiasto.pl/rss,11.xml'), (u'Artyku\u0142y czytelnik\xf3w', u'http://rss.trojmiasto.pl/rss,12.xml'), (u'Korki', u'http://rss.trojmiasto.pl/rss,13.xml'), (u'Historia', u'http://rss.trojmiasto.pl/rss,14.xml'), (u'Biznes', u'http://rss.trojmiasto.pl/rss,16.xml'), (u'Kryminalne Tr\xf3jmiasto', u'http://rss.trojmiasto.pl/rss,17.xml'), (u'Przewodnik', u'http://rss.trojmiasto.pl/rss,18.xml'), (u'Aktywne Tr\xf3jmiasto', u'http://rss.trojmiasto.pl/rss,19.xml'), (u'Delux', u'http://rss.trojmiasto.pl/rss,20.xml')] + + def print_version(self, url): + return url + '?print=1'