diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe new file mode 100644 index 0000000000..d5b4997aa7 --- /dev/null +++ b/recipes/benchmark_pl.recipe @@ -0,0 +1,70 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re +class Benchmark_pl(BasicNewsRecipe): + title = u'Benchmark.pl' + __author__ = 'fenuks' + description = u'benchmark.pl -IT site' + cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif' + category = 'IT' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + no_stylesheets=True + preprocess_regexps = [(re.compile(ur'\bWięcej o .*', re.DOTALL|re.IGNORECASE), lambda match: '')] + keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})] + remove_tags_after=dict(name='div', attrs={'class':'body'}) + remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})] + INDEX= 'http://www.benchmark.pl' + feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'), + (u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')] + + + def append_page(self, soup, appendtag): + nexturl = soup.find('span', attrs={'class':'next'}) + while nexturl is not None: + nexturl= self.INDEX + nexturl.parent['href'] + soup2 = self.index_to_soup(nexturl) + nexturl=soup2.find('span', attrs={'class':'next'}) + pagetext = soup2.find(name='div', attrs={'class':'body'}) + appendtag.find('div', attrs={'class':'k_ster'}).extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + if appendtag.find('div', attrs={'class':'k_ster'}) is not None: + appendtag.find('div', attrs={'class':'k_ster'}).extract() + + + def image_article(self, soup, appendtag): + nexturl=soup.find('div', attrs={'class':'preview'}) + if nexturl is not None: + nexturl=nexturl.find('a', attrs={'class':'move_next'}) + image=appendtag.find('div', attrs={'class':'preview'}).div['style'][16:] + image=self.INDEX + image[:image.find("')")] + appendtag.find(attrs={'class':'preview'}).name='img' + appendtag.find(attrs={'class':'preview'})['src']=image + appendtag.find('a', attrs={'class':'move_next'}).extract() + while nexturl is not None: + nexturl= self.INDEX + nexturl['href'] + soup2 = self.index_to_soup(nexturl) + nexturl=soup2.find('a', attrs={'class':'move_next'}) + image=soup2.find('div', attrs={'class':'preview'}).div['style'][16:] + image=self.INDEX + image[:image.find("')")] + soup2.find(attrs={'class':'preview'}).name='img' + soup2.find(attrs={'class':'preview'})['src']=image + pagetext=soup2.find('div', attrs={'class':'gallery'}) + pagetext.find('div', attrs={'class':'title'}).extract() + pagetext.find('div', attrs={'class':'thumb'}).extract() + pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract() + if nexturl is not None: + pagetext.find('a', attrs={'class':'move_next'}).extract() + pagetext.find('a', attrs={'class':'move_back'}).extract() + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + + + + def preprocess_html(self, soup): + if soup.find('div', attrs={'class':'preview'}) is not None: + self.image_article(soup, soup.body) + else: + self.append_page(soup, soup.body) + return soup diff --git a/recipes/cgm_pl.recipe b/recipes/cgm_pl.recipe new file mode 100644 index 0000000000..eba856ac3a --- /dev/null +++ b/recipes/cgm_pl.recipe @@ -0,0 +1,40 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class CGM(BasicNewsRecipe): + title = u'CGM' + oldest_article = 7 + __author__ = 'fenuks' + description = u'Codzienna Gazeta Muzyczna' + cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg' + category = 'music' + language = 'pl' + use_embedded_content = False + max_articles_per_feed = 100 + no_stylesheers=True + extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;}' + remove_tags_before=dict(id='mainContent') + remove_tags_after=dict(name='div', attrs={'class':'fbContainer'}) + remove_tags=[dict(name='div', attrs={'class':'fbContainer'}), + dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}), + dict(id=['movieShare', 'container'])] + feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'), + (u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')] + + + def preprocess_html(self, soup): + ad=soup.findAll('img') + for r in ad: + if '/_vault/_article_photos/5841.jpg' in r['src'] or '_vault/_article_photos/5807.jpg' in r['src'] or 'article_photos/5841.jpg' in r['src'] or 'article_photos/5825.jpg' in r['src'] or '_article_photos/5920.jpg' in r['src'] or '_article_photos/5919.jpg' in r['src'] or '_article_photos/5918.jpg' in r['src'] or '_article_photos/5914.jpg' in r['src'] or '_article_photos/5911.jpg' in r['src'] or '_article_photos/5923.jpg' in r['src'] or '_article_photos/5921.jpg' in r['src']: + ad[ad.index(r)].extract() + gallery=soup.find('div', attrs={'class':'galleryFlash'}) + if gallery: + img=gallery.find('embed') + if img: + img=img['src'][35:] + img='http://www.cgm.pl/_vault/_gallery/_photo/'+img + param=gallery.findAll(name='param') + for i in param: + i.extract() + gallery.contents[1].name='img' + gallery.contents[1]['src']=img + return soup \ No newline at end of file diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe new file mode 100644 index 0000000000..d80161e71a --- /dev/null +++ b/recipes/dzieje_pl.recipe @@ -0,0 +1,17 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Dzieje(BasicNewsRecipe): + title = u'dzieje.pl' + __author__ = 'fenuks' + description = 'Dzieje - history of Poland' + cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png' + category = 'history' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + remove_javascript=True + no_stylesheets= True + remove_tags_before= dict(name='h1', attrs={'class':'title'}) + remove_tags_after= dict(id='dogory') + remove_tags=[dict(id='dogory')] + feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')] diff --git a/recipes/greenlinux_pl.recipe b/recipes/greenlinux_pl.recipe new file mode 100644 index 0000000000..3c5a3c8f20 --- /dev/null +++ b/recipes/greenlinux_pl.recipe @@ -0,0 +1,13 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class GreenLinux(BasicNewsRecipe): + title = u'GreenLinux.pl' + __author__ = 'fenuks' + category = 'IT' + language = 'pl' + cover_url = 'http://lh5.ggpht.com/_xd_6Y9kXhEc/S8tjyqlfhfI/AAAAAAAAAYU/zFNTp07ZQko/top.png' + oldest_article = 15 + max_articles_per_feed = 100 + auto_cleanup = True + + feeds = [(u'Newsy', u'http://feeds.feedburner.com/greenlinux')] diff --git a/recipes/historia_pl.recipe b/recipes/historia_pl.recipe new file mode 100644 index 0000000000..68321df351 --- /dev/null +++ b/recipes/historia_pl.recipe @@ -0,0 +1,11 @@ +class Historia_org_pl(BasicNewsRecipe): + title = u'Historia.org.pl' + __author__ = 'fenuks' + description = u'history site' + cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg' + category = 'history' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + + feeds = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')] diff --git a/recipes/icons/benchmark_pl.png b/recipes/icons/benchmark_pl.png new file mode 100644 index 0000000000..a1519a3fd7 Binary files /dev/null and b/recipes/icons/benchmark_pl.png differ diff --git a/recipes/icons/cgm_pl.png b/recipes/icons/cgm_pl.png new file mode 100644 index 0000000000..0e332f720e Binary files /dev/null and b/recipes/icons/cgm_pl.png differ diff --git a/recipes/icons/dzieje_pl.png b/recipes/icons/dzieje_pl.png new file mode 100644 index 0000000000..c3033c62f9 Binary files /dev/null and b/recipes/icons/dzieje_pl.png differ diff --git a/recipes/icons/greenlinux_pl.png b/recipes/icons/greenlinux_pl.png new file mode 100644 index 0000000000..475ac04053 Binary files /dev/null and b/recipes/icons/greenlinux_pl.png differ diff --git a/recipes/icons/historia_pl.png b/recipes/icons/historia_pl.png new file mode 100644 index 0000000000..f7774e3139 Binary files /dev/null and b/recipes/icons/historia_pl.png differ diff --git a/recipes/icons/lomza.png b/recipes/icons/lomza.png new file mode 100644 index 0000000000..d7ee0a82ef Binary files /dev/null and b/recipes/icons/lomza.png differ diff --git a/recipes/ksiazka_pl.recipe b/recipes/ksiazka_pl.recipe new file mode 100644 index 0000000000..7f9999f782 --- /dev/null +++ b/recipes/ksiazka_pl.recipe @@ -0,0 +1,28 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re +class Ksiazka_net_pl(BasicNewsRecipe): + title = u'ksiazka.net.pl' + __author__ = 'fenuks' + description = u'Ksiazka.net.pl - book vortal' + cover_url = 'http://www.ksiazka.net.pl/fileadmin/templates/ksiazka.net.pl/images/1PortalKsiegarski-logo.jpg' + category = 'books' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + no_stylesheets= True + #extra_css = 'img {float: right;}' + preprocess_regexps = [(re.compile(ur'Podoba mi się, kupuję:'), lambda match: '
')] + remove_tags_before= dict(name='div', attrs={'class':'m-body'}) + remove_tags_after= dict(name='div', attrs={'class':'m-body-link'}) + remove_tags=[dict(attrs={'class':['mk_library-icon', 'm-body-link', 'tagi']})] + feeds = [(u'Wiadomości', u'http://www.ksiazka.net.pl/?id=wiadomosci&type=100'), + (u'Książki', u'http://www.ksiazka.net.pl/?id=ksiazki&type=100'), + (u'Rynek', u'http://www.ksiazka.net.pl/?id=rynek&type=100')] + + def image_url_processor(self, baseurl, url): + if (('file://' in url) and ('www.ksiazka.net.pl/' not in url)): + return 'http://www.ksiazka.net.pl/' + url[8:] + elif 'http://' not in url: + return 'http://www.ksiazka.net.pl/' + url + else: + return url diff --git a/recipes/lomza.recipe b/recipes/lomza.recipe new file mode 100644 index 0000000000..899745a84a --- /dev/null +++ b/recipes/lomza.recipe @@ -0,0 +1,14 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Lomza(BasicNewsRecipe): + title = u'4Lomza' + __author__ = 'fenuks' + description = u'4Łomża - regional site' + cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg' + language = 'pl' + oldest_article = 15 + no_styleseets=True + max_articles_per_feed = 100 + remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})] + keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})] + feeds = [(u'Łomża', u'http://feeds.feedburner.com/4lomza.pl')] diff --git a/recipes/tablety_pl.recipe b/recipes/tablety_pl.recipe new file mode 100644 index 0000000000..08212fbc66 --- /dev/null +++ b/recipes/tablety_pl.recipe @@ -0,0 +1,12 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class Tablety_pl(BasicNewsRecipe): + title = u'Tablety.pl' + __author__ = 'fenuks' + description = u'tablety.pl - latest tablet news' + cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png' + category = 'IT' + language = 'pl' + oldest_article = 8 + max_articles_per_feed = 100 + feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]