mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Various Polish news sources by fenuks
This commit is contained in:
parent
2f82d4a8fc
commit
02f8f08b65
70
recipes/benchmark_pl.recipe
Normal file
70
recipes/benchmark_pl.recipe
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
class Benchmark_pl(BasicNewsRecipe):
|
||||||
|
title = u'Benchmark.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'benchmark.pl -IT site'
|
||||||
|
cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
|
||||||
|
category = 'IT'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets=True
|
||||||
|
preprocess_regexps = [(re.compile(ur'\bWięcej o .*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')]
|
||||||
|
keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']})]
|
||||||
|
remove_tags_after=dict(name='div', attrs={'class':'body'})
|
||||||
|
remove_tags=[dict(name='div', attrs={'class':['kategoria', 'socialize', 'thumb', 'panelOcenaObserwowane', 'categoryNextToSocializeGallery']})]
|
||||||
|
INDEX= 'http://www.benchmark.pl'
|
||||||
|
feeds = [(u'Aktualności', u'http://www.benchmark.pl/rss/aktualnosci-pliki.xml'),
|
||||||
|
(u'Testy i recenzje', u'http://www.benchmark.pl/rss/testy-recenzje-minirecenzje.xml')]
|
||||||
|
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
nexturl = soup.find('span', attrs={'class':'next'})
|
||||||
|
while nexturl is not None:
|
||||||
|
nexturl= self.INDEX + nexturl.parent['href']
|
||||||
|
soup2 = self.index_to_soup(nexturl)
|
||||||
|
nexturl=soup2.find('span', attrs={'class':'next'})
|
||||||
|
pagetext = soup2.find(name='div', attrs={'class':'body'})
|
||||||
|
appendtag.find('div', attrs={'class':'k_ster'}).extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
if appendtag.find('div', attrs={'class':'k_ster'}) is not None:
|
||||||
|
appendtag.find('div', attrs={'class':'k_ster'}).extract()
|
||||||
|
|
||||||
|
|
||||||
|
def image_article(self, soup, appendtag):
|
||||||
|
nexturl=soup.find('div', attrs={'class':'preview'})
|
||||||
|
if nexturl is not None:
|
||||||
|
nexturl=nexturl.find('a', attrs={'class':'move_next'})
|
||||||
|
image=appendtag.find('div', attrs={'class':'preview'}).div['style'][16:]
|
||||||
|
image=self.INDEX + image[:image.find("')")]
|
||||||
|
appendtag.find(attrs={'class':'preview'}).name='img'
|
||||||
|
appendtag.find(attrs={'class':'preview'})['src']=image
|
||||||
|
appendtag.find('a', attrs={'class':'move_next'}).extract()
|
||||||
|
while nexturl is not None:
|
||||||
|
nexturl= self.INDEX + nexturl['href']
|
||||||
|
soup2 = self.index_to_soup(nexturl)
|
||||||
|
nexturl=soup2.find('a', attrs={'class':'move_next'})
|
||||||
|
image=soup2.find('div', attrs={'class':'preview'}).div['style'][16:]
|
||||||
|
image=self.INDEX + image[:image.find("')")]
|
||||||
|
soup2.find(attrs={'class':'preview'}).name='img'
|
||||||
|
soup2.find(attrs={'class':'preview'})['src']=image
|
||||||
|
pagetext=soup2.find('div', attrs={'class':'gallery'})
|
||||||
|
pagetext.find('div', attrs={'class':'title'}).extract()
|
||||||
|
pagetext.find('div', attrs={'class':'thumb'}).extract()
|
||||||
|
pagetext.find('div', attrs={'class':'panelOcenaObserwowane'}).extract()
|
||||||
|
if nexturl is not None:
|
||||||
|
pagetext.find('a', attrs={'class':'move_next'}).extract()
|
||||||
|
pagetext.find('a', attrs={'class':'move_back'}).extract()
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
if soup.find('div', attrs={'class':'preview'}) is not None:
|
||||||
|
self.image_article(soup, soup.body)
|
||||||
|
else:
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
return soup
|
40
recipes/cgm_pl.recipe
Normal file
40
recipes/cgm_pl.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class CGM(BasicNewsRecipe):
|
||||||
|
title = u'CGM'
|
||||||
|
oldest_article = 7
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Codzienna Gazeta Muzyczna'
|
||||||
|
cover_url = 'http://www.krafcy.com/foto/tinymce/Image/cgm%281%29.jpg'
|
||||||
|
category = 'music'
|
||||||
|
language = 'pl'
|
||||||
|
use_embedded_content = False
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheers=True
|
||||||
|
extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;}'
|
||||||
|
remove_tags_before=dict(id='mainContent')
|
||||||
|
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
|
||||||
|
remove_tags=[dict(name='div', attrs={'class':'fbContainer'}),
|
||||||
|
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
|
||||||
|
dict(id=['movieShare', 'container'])]
|
||||||
|
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
|
||||||
|
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
ad=soup.findAll('img')
|
||||||
|
for r in ad:
|
||||||
|
if '/_vault/_article_photos/5841.jpg' in r['src'] or '_vault/_article_photos/5807.jpg' in r['src'] or 'article_photos/5841.jpg' in r['src'] or 'article_photos/5825.jpg' in r['src'] or '_article_photos/5920.jpg' in r['src'] or '_article_photos/5919.jpg' in r['src'] or '_article_photos/5918.jpg' in r['src'] or '_article_photos/5914.jpg' in r['src'] or '_article_photos/5911.jpg' in r['src'] or '_article_photos/5923.jpg' in r['src'] or '_article_photos/5921.jpg' in r['src']:
|
||||||
|
ad[ad.index(r)].extract()
|
||||||
|
gallery=soup.find('div', attrs={'class':'galleryFlash'})
|
||||||
|
if gallery:
|
||||||
|
img=gallery.find('embed')
|
||||||
|
if img:
|
||||||
|
img=img['src'][35:]
|
||||||
|
img='http://www.cgm.pl/_vault/_gallery/_photo/'+img
|
||||||
|
param=gallery.findAll(name='param')
|
||||||
|
for i in param:
|
||||||
|
i.extract()
|
||||||
|
gallery.contents[1].name='img'
|
||||||
|
gallery.contents[1]['src']=img
|
||||||
|
return soup
|
17
recipes/dzieje_pl.recipe
Normal file
17
recipes/dzieje_pl.recipe
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Dzieje(BasicNewsRecipe):
|
||||||
|
title = u'dzieje.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = 'Dzieje - history of Poland'
|
||||||
|
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
||||||
|
category = 'history'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_javascript=True
|
||||||
|
no_stylesheets= True
|
||||||
|
remove_tags_before= dict(name='h1', attrs={'class':'title'})
|
||||||
|
remove_tags_after= dict(id='dogory')
|
||||||
|
remove_tags=[dict(id='dogory')]
|
||||||
|
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
13
recipes/greenlinux_pl.recipe
Normal file
13
recipes/greenlinux_pl.recipe
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class GreenLinux(BasicNewsRecipe):
|
||||||
|
title = u'GreenLinux.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
category = 'IT'
|
||||||
|
language = 'pl'
|
||||||
|
cover_url = 'http://lh5.ggpht.com/_xd_6Y9kXhEc/S8tjyqlfhfI/AAAAAAAAAYU/zFNTp07ZQko/top.png'
|
||||||
|
oldest_article = 15
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [(u'Newsy', u'http://feeds.feedburner.com/greenlinux')]
|
11
recipes/historia_pl.recipe
Normal file
11
recipes/historia_pl.recipe
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
class Historia_org_pl(BasicNewsRecipe):
|
||||||
|
title = u'Historia.org.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'history site'
|
||||||
|
cover_url = 'http://lh3.googleusercontent.com/_QeRQus12wGg/TOvHsZ2GN7I/AAAAAAAAD_o/LY1JZDnq7ro/logo5.jpg'
|
||||||
|
category = 'history'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
|
feeds = [(u'Artykuły', u'http://www.historia.org.pl/index.php?format=feed&type=rss')]
|
BIN
recipes/icons/benchmark_pl.png
Normal file
BIN
recipes/icons/benchmark_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 658 B |
BIN
recipes/icons/cgm_pl.png
Normal file
BIN
recipes/icons/cgm_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 837 B |
BIN
recipes/icons/dzieje_pl.png
Normal file
BIN
recipes/icons/dzieje_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 642 B |
BIN
recipes/icons/greenlinux_pl.png
Normal file
BIN
recipes/icons/greenlinux_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 648 B |
BIN
recipes/icons/historia_pl.png
Normal file
BIN
recipes/icons/historia_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 806 B |
BIN
recipes/icons/lomza.png
Normal file
BIN
recipes/icons/lomza.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.0 KiB |
28
recipes/ksiazka_pl.recipe
Normal file
28
recipes/ksiazka_pl.recipe
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
class Ksiazka_net_pl(BasicNewsRecipe):
|
||||||
|
title = u'ksiazka.net.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'Ksiazka.net.pl - book vortal'
|
||||||
|
cover_url = 'http://www.ksiazka.net.pl/fileadmin/templates/ksiazka.net.pl/images/1PortalKsiegarski-logo.jpg'
|
||||||
|
category = 'books'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets= True
|
||||||
|
#extra_css = 'img {float: right;}'
|
||||||
|
preprocess_regexps = [(re.compile(ur'Podoba mi się, kupuję:'), lambda match: '<br />')]
|
||||||
|
remove_tags_before= dict(name='div', attrs={'class':'m-body'})
|
||||||
|
remove_tags_after= dict(name='div', attrs={'class':'m-body-link'})
|
||||||
|
remove_tags=[dict(attrs={'class':['mk_library-icon', 'm-body-link', 'tagi']})]
|
||||||
|
feeds = [(u'Wiadomości', u'http://www.ksiazka.net.pl/?id=wiadomosci&type=100'),
|
||||||
|
(u'Książki', u'http://www.ksiazka.net.pl/?id=ksiazki&type=100'),
|
||||||
|
(u'Rynek', u'http://www.ksiazka.net.pl/?id=rynek&type=100')]
|
||||||
|
|
||||||
|
def image_url_processor(self, baseurl, url):
|
||||||
|
if (('file://' in url) and ('www.ksiazka.net.pl/' not in url)):
|
||||||
|
return 'http://www.ksiazka.net.pl/' + url[8:]
|
||||||
|
elif 'http://' not in url:
|
||||||
|
return 'http://www.ksiazka.net.pl/' + url
|
||||||
|
else:
|
||||||
|
return url
|
14
recipes/lomza.recipe
Normal file
14
recipes/lomza.recipe
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Lomza(BasicNewsRecipe):
|
||||||
|
title = u'4Lomza'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'4Łomża - regional site'
|
||||||
|
cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 15
|
||||||
|
no_styleseets=True
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
remove_tags=[dict(name='div', attrs={'class':['bxbanner', 'drukuj', 'wyslijznajomemu']})]
|
||||||
|
keep_only_tags=[dict(name='div', attrs={'class':'wiadomosc'})]
|
||||||
|
feeds = [(u'Łomża', u'http://feeds.feedburner.com/4lomza.pl')]
|
12
recipes/tablety_pl.recipe
Normal file
12
recipes/tablety_pl.recipe
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Tablety_pl(BasicNewsRecipe):
|
||||||
|
title = u'Tablety.pl'
|
||||||
|
__author__ = 'fenuks'
|
||||||
|
description = u'tablety.pl - latest tablet news'
|
||||||
|
cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
|
||||||
|
category = 'IT'
|
||||||
|
language = 'pl'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
|
Loading…
x
Reference in New Issue
Block a user