From b50a6cb9c807eed63f31ca07d59573b20599989b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 11 Nov 2012 13:08:23 +0100 Subject: [PATCH] one layout for gazeta.pl flavours --- recipes/gazeta_pl_krakow.recipe | 103 +++++++++++++++++++++++++++ recipes/gazeta_pl_warszawa.recipe | 100 ++++++++++++++++++++++++++ recipes/gazeta_wyborcza.recipe | 2 +- recipes/icons/gazeta_pl_krakow.png | Bin 0 -> 802 bytes recipes/icons/gazeta_pl_szczecin.png | Bin 0 -> 802 bytes recipes/icons/gazeta_pl_warszawa.png | Bin 0 -> 802 bytes recipes/icons/gazeta_wyborcza.png | Bin 221 -> 802 bytes 7 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 recipes/gazeta_pl_krakow.recipe create mode 100644 recipes/gazeta_pl_warszawa.recipe create mode 100644 recipes/icons/gazeta_pl_krakow.png create mode 100644 recipes/icons/gazeta_pl_szczecin.png create mode 100644 recipes/icons/gazeta_pl_warszawa.png diff --git a/recipes/gazeta_pl_krakow.recipe b/recipes/gazeta_pl_krakow.recipe new file mode 100644 index 0000000000..0f35e536f6 --- /dev/null +++ b/recipes/gazeta_pl_krakow.recipe @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__copyright__ = 'teepel based on GW from fenuks' + +''' +krakow.gazeta.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class gw_krakow(BasicNewsRecipe): + title = u'Gazeta.pl Kraków' + __author__ = 'teepel based on GW from fenuks' + language = 'pl' + description =u'Wiadomości z Krakowa na portalu Gazeta.pl.' + category='newspaper' + publication_type = 'newspaper' + masthead_url='http://bi.gazeta.pl/im/5/8528/m8528105.gif' + INDEX='http://krakow.gazeta.pl/' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})) + remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'})) + remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'})) + remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'})) + remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_buttons'})) + + remove_tags_after = [dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})] + + feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/krakow.xml')] + + def skip_ad_pages(self, soup): + tag=soup.find(name='a', attrs={'class':'btn'}) + if tag: + new_soup=self.index_to_soup(tag['href'], raw=True) + return new_soup + + + def append_page(self, soup, appendtag): + loop=False + tag = soup.find('div', attrs={'id':'Str'}) + if appendtag.find('div', attrs={'id':'Str'}): + nexturl=tag.findAll('a') + appendtag.find('div', attrs={'id':'Str'}).extract() + loop=True + if appendtag.find(id='source'): + appendtag.find(id='source').extract() + while loop: + loop=False + for link in nexturl: + if u'następne' in link.string: + url= self.INDEX + link['href'] + soup2 = self.index_to_soup(url) + pagetext = soup2.find(id='artykul') + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag = soup2.find('div', attrs={'id':'Str'}) + nexturl=tag.findAll('a') + loop=True + + def gallery_article(self, appendtag): + tag=appendtag.find(id='container_gal') + if tag: + nexturl=appendtag.find(id='gal_btn_next').a['href'] + appendtag.find(id='gal_navi').extract() + while nexturl: + soup2=self.index_to_soup(nexturl) + pagetext=soup2.find(id='container_gal') + nexturl=pagetext.find(id='gal_btn_next') + if nexturl: + nexturl=nexturl.a['href'] + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + rem=appendtag.find(id='gal_navi') + if rem: + rem.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + if soup.find(id='container_gal'): + self.gallery_article(soup.body) + return soup + diff --git a/recipes/gazeta_pl_warszawa.recipe b/recipes/gazeta_pl_warszawa.recipe new file mode 100644 index 0000000000..7a43931db4 --- /dev/null +++ b/recipes/gazeta_pl_warszawa.recipe @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = 'teepel based on GW from fenuks' + +''' +warszawa.gazeta.pl +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class gw_wawa(BasicNewsRecipe): + title = u'Gazeta.pl Warszawa' + __author__ = 'teepel based on GW from fenuks' + language = 'pl' + description ='Wiadomości z Warszawy na portalu Gazeta.pl.' + category='newspaper' + publication_type = 'newspaper' + masthead_url='http://bi.gazeta.pl/im/3/4089/m4089863.gif' + INDEX='http://warszawa.gazeta.pl/' + remove_empty_feeds= True + oldest_article = 1 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets=True + + keep_only_tags =[] + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article'})) + + remove_tags =[] + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_likes'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tools'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'rel'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_share'})) + remove_tags.append(dict(name = 'u1', attrs = {'id' : 'articleToolbar'})) + remove_tags.append(dict(name = 'li', attrs = {'class' : 'atComments'})) + remove_tags.append(dict(name = 'li', attrs = {'class' : 'atLicense'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'banP4'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'article_toolbar'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_tags'})) + remove_tags.append(dict(name = 'p', attrs = {'class' : 'txt_upl'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazeta_article_related_new'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'gazetaVideoPlayer'})) + remove_tags.append(dict(name = 'div', attrs = {'id' : 'gazeta_article_miniatures'})) + + feeds = [(u'Wiadomości', u'http://rss.gazeta.pl/pub/rss/warszawa.xml')] + + def skip_ad_pages(self, soup): + tag=soup.find(name='a', attrs={'class':'btn'}) + if tag: + new_soup=self.index_to_soup(tag['href'], raw=True) + return new_soup + + + def append_page(self, soup, appendtag): + loop=False + tag = soup.find('div', attrs={'id':'Str'}) + if appendtag.find('div', attrs={'id':'Str'}): + nexturl=tag.findAll('a') + appendtag.find('div', attrs={'id':'Str'}).extract() + loop=True + if appendtag.find(id='source'): + appendtag.find(id='source').extract() + while loop: + loop=False + for link in nexturl: + if u'następne' in link.string: + url= self.INDEX + link['href'] + soup2 = self.index_to_soup(url) + pagetext = soup2.find(id='artykul') + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + tag = soup2.find('div', attrs={'id':'Str'}) + nexturl=tag.findAll('a') + loop=True + + def gallery_article(self, appendtag): + tag=appendtag.find(id='container_gal') + if tag: + nexturl=appendtag.find(id='gal_btn_next').a['href'] + appendtag.find(id='gal_navi').extract() + while nexturl: + soup2=self.index_to_soup(nexturl) + pagetext=soup2.find(id='container_gal') + nexturl=pagetext.find(id='gal_btn_next') + if nexturl: + nexturl=nexturl.a['href'] + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + rem=appendtag.find(id='gal_navi') + if rem: + rem.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + if soup.find(id='container_gal'): + self.gallery_article(soup.body) + return soup + diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe index 3d416e444f..633b80444a 100644 --- a/recipes/gazeta_wyborcza.recipe +++ b/recipes/gazeta_wyborcza.recipe @@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class Gazeta_Wyborcza(BasicNewsRecipe): - title = u'Gazeta Wyborcza' + title = u'Gazeta.pl' __author__ = 'fenuks, Artur Stachecki' language = 'pl' description = 'news from gazeta.pl' diff --git a/recipes/icons/gazeta_pl_krakow.png b/recipes/icons/gazeta_pl_krakow.png new file mode 100644 index 0000000000000000000000000000000000000000..119afbba3a21235994b651d96e0ea8cc5fd35658 GIT binary patch literal 802 zcmV+-1Ks?IP)*cR1hS_Z?3}#LiYB z72pr-06^k|=$m0=t%<$9Nbd7@@%KFjfId5kom(U*?4j$q{V08|NOa~r%`Z-3rmnY# zez=GWo48Qpw5E0Z6adNOHIk_rT)n}@u~Wpumr!9xc7OFgrcy(;Ix4S)A`x(*h$1Y?o44ZQp|P#&rSjl?{>!8ufaFDMWG z;~|Mbnjx||A0!t_m{JAv*CN@E-X?fz5OZaQWMT>l0`!l+l6vWBa$mlOy}3+$W)!Kf zV{g^S4jo3tBjY&bA?bkD$?pJ&&rV=lA$FyKdh&6ipD!bgb>iWRh)EFT<614kZ+}9$ z>MFLnij5LFhe|WVmaBM~6iz3Iap3!)4Q6^CL9k)mW{~~(2(DI#D53kacL2y7J%rR6 zgqKQ0S7*o`c?EA;jOnYrCDTcV@pT{cs-ARuiBn^tR%@)1zTPqJ9L;}{~-F8 zam?I2h$0y+;ScPj`>Rhugy`#G?7}jtzmLoZucP8~lZZ7)E=_!Tto`%9m9a~Au=N&d z&rWo)jBJFM@;u4xO(d5k8W|_K{5wkJdq}R9(KnXx_wE8f_4Z&Zw~%HNy?h6+FOQea z;BW6o7Ha6}zw!F=_)qKr7i$SmUyx|ziimR}BH|iNSsywfwS(`=-B&*mT`Wtd%;JiK g-;au|)kQ?)U%3USo4=32mjD0&07*qoM6N<$g3INC1ONa4 literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_pl_szczecin.png b/recipes/icons/gazeta_pl_szczecin.png new file mode 100644 index 0000000000000000000000000000000000000000..119afbba3a21235994b651d96e0ea8cc5fd35658 GIT binary patch literal 802 zcmV+-1Ks?IP)*cR1hS_Z?3}#LiYB z72pr-06^k|=$m0=t%<$9Nbd7@@%KFjfId5kom(U*?4j$q{V08|NOa~r%`Z-3rmnY# zez=GWo48Qpw5E0Z6adNOHIk_rT)n}@u~Wpumr!9xc7OFgrcy(;Ix4S)A`x(*h$1Y?o44ZQp|P#&rSjl?{>!8ufaFDMWG z;~|Mbnjx||A0!t_m{JAv*CN@E-X?fz5OZaQWMT>l0`!l+l6vWBa$mlOy}3+$W)!Kf zV{g^S4jo3tBjY&bA?bkD$?pJ&&rV=lA$FyKdh&6ipD!bgb>iWRh)EFT<614kZ+}9$ z>MFLnij5LFhe|WVmaBM~6iz3Iap3!)4Q6^CL9k)mW{~~(2(DI#D53kacL2y7J%rR6 zgqKQ0S7*o`c?EA;jOnYrCDTcV@pT{cs-ARuiBn^tR%@)1zTPqJ9L;}{~-F8 zam?I2h$0y+;ScPj`>Rhugy`#G?7}jtzmLoZucP8~lZZ7)E=_!Tto`%9m9a~Au=N&d z&rWo)jBJFM@;u4xO(d5k8W|_K{5wkJdq}R9(KnXx_wE8f_4Z&Zw~%HNy?h6+FOQea z;BW6o7Ha6}zw!F=_)qKr7i$SmUyx|ziimR}BH|iNSsywfwS(`=-B&*mT`Wtd%;JiK g-;au|)kQ?)U%3USo4=32mjD0&07*qoM6N<$g3INC1ONa4 literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_pl_warszawa.png b/recipes/icons/gazeta_pl_warszawa.png new file mode 100644 index 0000000000000000000000000000000000000000..119afbba3a21235994b651d96e0ea8cc5fd35658 GIT binary patch literal 802 zcmV+-1Ks?IP)*cR1hS_Z?3}#LiYB z72pr-06^k|=$m0=t%<$9Nbd7@@%KFjfId5kom(U*?4j$q{V08|NOa~r%`Z-3rmnY# zez=GWo48Qpw5E0Z6adNOHIk_rT)n}@u~Wpumr!9xc7OFgrcy(;Ix4S)A`x(*h$1Y?o44ZQp|P#&rSjl?{>!8ufaFDMWG z;~|Mbnjx||A0!t_m{JAv*CN@E-X?fz5OZaQWMT>l0`!l+l6vWBa$mlOy}3+$W)!Kf zV{g^S4jo3tBjY&bA?bkD$?pJ&&rV=lA$FyKdh&6ipD!bgb>iWRh)EFT<614kZ+}9$ z>MFLnij5LFhe|WVmaBM~6iz3Iap3!)4Q6^CL9k)mW{~~(2(DI#D53kacL2y7J%rR6 zgqKQ0S7*o`c?EA;jOnYrCDTcV@pT{cs-ARuiBn^tR%@)1zTPqJ9L;}{~-F8 zam?I2h$0y+;ScPj`>Rhugy`#G?7}jtzmLoZucP8~lZZ7)E=_!Tto`%9m9a~Au=N&d z&rWo)jBJFM@;u4xO(d5k8W|_K{5wkJdq}R9(KnXx_wE8f_4Z&Zw~%HNy?h6+FOQea z;BW6o7Ha6}zw!F=_)qKr7i$SmUyx|ziimR}BH|iNSsywfwS(`=-B&*mT`Wtd%;JiK g-;au|)kQ?)U%3USo4=32mjD0&07*qoM6N<$g3INC1ONa4 literal 0 HcmV?d00001 diff --git a/recipes/icons/gazeta_wyborcza.png b/recipes/icons/gazeta_wyborcza.png index 9e480cc41dedd7617a54c7c24c1e118d888b3d87..119afbba3a21235994b651d96e0ea8cc5fd35658 100644 GIT binary patch delta 791 zcmV+y1L*wS0ip(w7=H)`0000V^Z#K000QYrL_t(Ijct-oXk1kk#((d=m-%O!41_Ey zZLOvabum!sLaBm^pi~zkinLTv!G$0g5kzrcy6M72w_4mPrAAaz+N7m5X*y0&)6Arr zHkl@-$@I@KO(yf+yYJq6Tug>o@GQ@FIN#y-9Zy8W&Q>86;C~P706^k|=$m0=t%<$9 zNbd7@@%KFjfId5kom(U*?4j$q{V08|NOa~r%`Z-3rmnY#ez=GWo48Qpw5E0Z6adNO zHIk_rT)n}@u~Wpumr!9xc7OFgrcycDfE0wgh95wGF)dHc%d_1C7KyyumqCe=jHx{^KEuL7E}5IUgh!N|;gw z^VcHTkKQJDY7lc}hGb$22?F$wzmj_CX>wn_hrPK>e1B#Xsjp*i)yNJVM#UrJIOQSf zfY!WHwz`Uq5;})UGsKpw zc$pMVCx~(2`=AYGdLBWrVccer{rCv3R);8|`?Gfd$Q(U{)Eb1BN<>#@$R2qGZ(EMc z(L;DB!hiEcf?bc2`|z#J6grSRZ;Rpm2k5a9!M@!Dg@G-&+91sqUT-(}YBPl}Ms)IU zjDODIt+z0xS!8WvOGpKHJ&%B|+Mar>B*|0-TV5bLbeLfOAo`ba%-lSPA{i~=5A3A- zt4~3M=<8wZ!ZNDAkIV&7I;J!18EO1b~~AE2W0Shx;Tb#Tu+|x|DXdG6T|H9f`#Y4-JSuIQ7v(e zC`m~yNwrEYN(E93Mh1q4x`qb221X$U7FNacptHiD0 Tsn!Q4paup{S3j3^P6