mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Various Polish news sources by fenuks
This commit is contained in:
parent
2d3cdfedf3
commit
ab79b30dd3
38
recipes/adventure_zone_pl.recipe
Normal file
38
recipes/adventure_zone_pl.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Adventure_zone(BasicNewsRecipe):
|
||||
title = u'Adventure Zone'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Adventure zone - adventure games from A to Z'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
remove_tags_after= dict(name='td', attrs={'class':'main-body middle-border'})
|
||||
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }'
|
||||
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
|
||||
cover=soup.find(id='box_OstatninumerAZ')
|
||||
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
skip_tag = soup.body.findAll(name='a')
|
||||
if skip_tag is not None:
|
||||
for r in skip_tag:
|
||||
if 'articles.php?' in r['href']:
|
||||
if r.strong is not None:
|
||||
word=r.strong.string
|
||||
if ('zapowied' or 'recenzj') in word:
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item_id'+r['href'][r['href'].find('_id')+3:], raw=True)
|
||||
else:
|
||||
None
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('news.php?readmore', 'print.php?type=N&item_id')
|
||||
|
18
recipes/astro_news_pl.recipe
Normal file
18
recipes/astro_news_pl.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AstroNEWS(BasicNewsRecipe):
|
||||
title = u'AstroNEWS'
|
||||
__author__ = 'fenuks'
|
||||
description = 'AstroNEWS- astronomy every day'
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
cover_url='http://news.astronet.pl/img/logo_news.jpg'
|
||||
# no_stylesheets= True
|
||||
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
|
||||
|
15
recipes/astronomia_pl.recipe
Normal file
15
recipes/astronomia_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Astronomia_pl(BasicNewsRecipe):
|
||||
title = u'Astronomia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Astronomia - polish astronomy site'
|
||||
cover_url = 'http://www.astronomia.pl/grafika/logo.gif'
|
||||
category = 'astronomy, science'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
#no_stylesheets=True
|
||||
remove_tags_before=dict(name='div', attrs={'id':'a1'})
|
||||
keep_only_tags=[dict(name='div', attrs={'id':['a1', 'h2']})]
|
||||
feeds = [(u'Wiadomości z astronomii i astronautyki', u'http://www.astronomia.pl/rss/')]
|
15
recipes/elektroda_pl.recipe
Normal file
15
recipes/elektroda_pl.recipe
Normal file
@ -0,0 +1,15 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Elektroda(BasicNewsRecipe):
|
||||
title = u'Elektroda'
|
||||
oldest_article = 8
|
||||
__author__ = 'fenuks'
|
||||
description = 'Elektroda.pl'
|
||||
cover_url = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif'
|
||||
category = 'electronics'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
remove_tags_before=dict(name='span', attrs={'class':'postbody'})
|
||||
remove_tags_after=dict(name='td', attrs={'class':'spaceRow'})
|
||||
remove_tags=[dict(name='a', attrs={'href':'#top'})]
|
||||
feeds = [(u'Elektroda', u'http://www.elektroda.pl/rtvforum/rss.php')]
|
26
recipes/gildia_pl.recipe
Normal file
26
recipes/gildia_pl.recipe
Normal file
@ -0,0 +1,26 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Gildia(BasicNewsRecipe):
|
||||
title = u'Gildia.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gildia - cultural site'
|
||||
cover_url = 'http://www.film.gildia.pl/_n_/portal/redakcja/logo/logo-gildia.pl-500.jpg'
|
||||
category = 'culture'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets=True
|
||||
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
|
||||
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
|
||||
feeds = [(u'Gry', u'http://www.gry.gildia.pl/rss'), (u'Literatura', u'http://www.literatura.gildia.pl/rss'), (u'Film', u'http://www.film.gildia.pl/rss'), (u'Horror', u'http://www.horror.gildia.pl/rss'), (u'Konwenty', u'http://www.konwenty.gildia.pl/rss'), (u'Plansz\xf3wki', u'http://www.planszowki.gildia.pl/rss'), (u'Manga i anime', u'http://www.manga.gildia.pl/rss'), (u'Star Wars', u'http://www.starwars.gildia.pl/rss'), (u'Techno', u'http://www.techno.gildia.pl/rss'), (u'Historia', u'http://www.historia.gildia.pl/rss'), (u'Magia', u'http://www.magia.gildia.pl/rss'), (u'Bitewniaki', u'http://www.bitewniaki.gildia.pl/rss'), (u'RPG', u'http://www.rpg.gildia.pl/rss'), (u'LARP', u'http://www.larp.gildia.pl/rss'), (u'Muzyka', u'http://www.muzyka.gildia.pl/rss'), (u'Nauka', u'http://www.nauka.gildia.pl/rss')]
|
||||
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
content = soup.find('div', attrs={'class':'news'})
|
||||
skip_tag= content.findAll(name='a')
|
||||
if skip_tag is not None:
|
||||
for link in skip_tag:
|
||||
if 'recenzja' in link['href']:
|
||||
self.log.warn('odnosnik')
|
||||
self.log.warn(link['href'])
|
||||
return self.index_to_soup(link['href'], raw=True)
|
38
recipes/gry_online_pl.recipe
Normal file
38
recipes/gry_online_pl.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class Gry_online_pl(BasicNewsRecipe):
|
||||
title = u'Gry-Online.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'Gry-Online.pl - computer games'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 13
|
||||
INDEX= 'http://www.gry-online.pl/'
|
||||
cover_url='http://www.gry-online.pl/img/1st_10/1st-gol-logo.png'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = 'p.wn1{font-size:22px;}'
|
||||
remove_tags_after= [dict(name='div', attrs={'class':['tresc-newsa']})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['txthead']}), dict(name='p', attrs={'class':['wtx1', 'wn1', 'wob']}), dict(name='a', attrs={'class':['num_str_nex']})]
|
||||
#remove_tags= [dict(name='div', attrs={'class':['news_plat']})]
|
||||
feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
nexturl = soup.find('a', attrs={'class':'num_str_nex'})
|
||||
if appendtag.find('a', attrs={'class':'num_str_nex'}) is not None:
|
||||
appendtag.find('a', attrs={'class':'num_str_nex'}).replaceWith('\n')
|
||||
if nexturl is not None:
|
||||
if 'strona' in nexturl.div.string:
|
||||
nexturl= self.INDEX + nexturl['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
pagetext = soup2.findAll(name='p', attrs={'class':['wtx1', 'wn1', 'wob']})
|
||||
for tag in pagetext:
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, tag)
|
||||
self.append_page(soup2, appendtag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
BIN
recipes/icons/adventure_zone_pl.png
Normal file
BIN
recipes/icons/adventure_zone_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.6 KiB |
BIN
recipes/icons/astro_news_pl.png
Normal file
BIN
recipes/icons/astro_news_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 625 B |
BIN
recipes/icons/astronomia_pl.png
Normal file
BIN
recipes/icons/astronomia_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 389 B |
BIN
recipes/icons/elektroda_pl.png
Normal file
BIN
recipes/icons/elektroda_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1023 B |
BIN
recipes/icons/gry_online_pl.png
Normal file
BIN
recipes/icons/gry_online_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 249 B |
BIN
recipes/icons/ubuntu_pl.png
Normal file
BIN
recipes/icons/ubuntu_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 508 B |
16
recipes/ubuntu_pl.recipe
Normal file
16
recipes/ubuntu_pl.recipe
Normal file
@ -0,0 +1,16 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ubuntu_pl(BasicNewsRecipe):
|
||||
title = u'UBUNTU.pl'
|
||||
__author__ = 'fenuks'
|
||||
description = 'UBUNTU.pl - polish ubuntu community site'
|
||||
cover_url = 'http://ubuntu.pl/img/logo.jpg'
|
||||
category = 'linux, IT'
|
||||
language = 'pl'
|
||||
no_stylesheets = True
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '#main {text-align:left;}'
|
||||
keep_only_tags= [dict(name='td', attrs={'class':'teaser-node-mc'}), dict(name='h3', attrs={'class':'entry-title'}), dict(name='div', attrs={'class':'entry-content'})]
|
||||
remove_tags_after= [dict(name='div' , attrs={'class':'content'})]
|
||||
feeds = [('Czytelnia Ubuntu', 'http://feeds.feedburner.com/ubuntu-czytelnia'), (u'WikiGames', u'http://feeds.feedburner.com/WikiGames')]
|
Loading…
x
Reference in New Issue
Block a user