mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updated various Polish recipes
This commit is contained in:
parent
18cd783913
commit
015d45a06b
@ -9,6 +9,7 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
index='http://www.adventure-zone.info/fusion/'
|
||||
use_embedded_content=False
|
||||
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: '')]
|
||||
remove_tags_before= dict(name='td', attrs={'class':'main-bg'})
|
||||
@ -45,6 +46,19 @@ class Adventure_zone(BasicNewsRecipe):
|
||||
skip_tag = skip_tag.findAll(name='a')
|
||||
for r in skip_tag:
|
||||
if r.strong:
|
||||
word=r.strong.string
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word)):
|
||||
word=r.strong.string.lower()
|
||||
if word and (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
|
||||
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
footer=soup.find(attrs={'class':'news-footer middle-border'})
|
||||
if footer and len(footer('a'))>=2:
|
||||
footer('a')[1].extract()
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
|
||||
|
@ -68,4 +68,7 @@ class Benchmark_pl(BasicNewsRecipe):
|
||||
self.image_article(soup, soup.body)
|
||||
else:
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.INDEX + a['href']
|
||||
return soup
|
||||
|
@ -6,6 +6,7 @@ class CD_Action(BasicNewsRecipe):
|
||||
description = 'cdaction.pl - polish games magazine site'
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
index='http://www.cdaction.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
@ -18,3 +19,9 @@ class CD_Action(BasicNewsRecipe):
|
||||
soup = self.index_to_soup('http://www.cdaction.pl/magazyn/')
|
||||
self.cover_url='http://www.cdaction.pl'+ soup.find(id='wspolnik').div.a['href']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -11,6 +11,7 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
cover_url = 'http://userlogos.org/files/logos/Karmody/dobreprogramy_01.png'
|
||||
description = u'Aktualności i blogi z dobreprogramy.pl'
|
||||
encoding = 'utf-8'
|
||||
index='http://www.dobreprogramy.pl/'
|
||||
no_stylesheets = True
|
||||
language = 'pl'
|
||||
extra_css = '.title {font-size:22px;}'
|
||||
@ -22,3 +23,10 @@ class Dobreprogramy_pl(BasicNewsRecipe):
|
||||
#remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
|
||||
feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
|
||||
('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -7,6 +7,7 @@ class Dzieje(BasicNewsRecipe):
|
||||
cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
|
||||
category = 'history'
|
||||
language = 'pl'
|
||||
index='http://dzieje.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript=True
|
||||
@ -15,3 +16,10 @@ class Dzieje(BasicNewsRecipe):
|
||||
remove_tags_after= dict(id='dogory')
|
||||
remove_tags=[dict(id='dogory')]
|
||||
feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -21,3 +21,8 @@ class eioba(BasicNewsRecipe):
|
||||
(u'Rozrywka', u'http://www.eioba.pl/feed/categories/10.xml'),
|
||||
(u'Rożne', u'http://www.eioba.pl/feed/categories/9.xml')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -7,6 +7,7 @@ class eMuzyka(BasicNewsRecipe):
|
||||
description = u'Emuzyka to największa i najpopularniejsza strona o muzyce w Polsce'
|
||||
category = 'music'
|
||||
language = 'pl'
|
||||
index='http://www.emuzyka.pl'
|
||||
cover_url='http://s.emuzyka.pl/img/emuzyka_invert_small.jpg'
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
@ -14,3 +15,9 @@ class eMuzyka(BasicNewsRecipe):
|
||||
keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
|
||||
remove_tags=[dict(name='span', attrs={'id':'date'})]
|
||||
feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -7,6 +7,7 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
|
||||
category = 'movies'
|
||||
language = 'pl'
|
||||
index='http://www.filmweb.pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
@ -39,3 +40,9 @@ class Filmweb_pl(BasicNewsRecipe):
|
||||
self.log.warn(skip_tag)
|
||||
return self.index_to_soup(skip_tag['href'], raw=True)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -6,12 +6,14 @@ class Gameplay_pl(BasicNewsRecipe):
|
||||
description = u'gameplay.pl - serwis o naszych zainteresowaniach, grach, filmach, książkach, muzyce, fotografii i konsolach.'
|
||||
category = 'games, movies, books, music'
|
||||
language = 'pl'
|
||||
index='http://gameplay.pl'
|
||||
masthead_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
cover_url= 'http://gameplay.pl/img/gpy_top_logo.png'
|
||||
max_articles_per_feed = 100
|
||||
remove_javascript= True
|
||||
no_stylesheets= True
|
||||
keep_only_tags=[dict(name='div', attrs={'class':['news_endpage_tit', 'news']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im']})]
|
||||
remove_tags=[dict(name='div', attrs={'class':['galeria', 'noedit center im', 'news_list', 'news_list_autor', 'stop_bot', 'tagi']}), dict(attrs={'usemap':'#map'})]
|
||||
feeds = [(u'Wiadomo\u015bci', u'http://gameplay.pl/rss/')]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
@ -19,3 +21,9 @@ class Gameplay_pl(BasicNewsRecipe):
|
||||
return 'http://gameplay.pl'+ url[2:]
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and '../' in a['href']:
|
||||
a['href']=self.index + a['href'][2:]
|
||||
return soup
|
@ -9,6 +9,7 @@ class Gildia(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds=True
|
||||
no_stylesheets=True
|
||||
remove_tags=[dict(name='div', attrs={'class':'backlink'}), dict(name='div', attrs={'class':'im_img'}), dict(name='div', attrs={'class':'addthis_toolbox addthis_default_style'})]
|
||||
keep_only_tags=dict(name='div', attrs={'class':'widetext'})
|
||||
@ -24,3 +25,16 @@ class Gildia(BasicNewsRecipe):
|
||||
self.log.warn('odnosnik')
|
||||
self.log.warn(link['href'])
|
||||
return self.index_to_soup(link['href'], raw=True)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
if '/gry/' in a['href']:
|
||||
a['href']='http://www.gry.gildia.pl' + a['href']
|
||||
elif u'książk' in soup.title.string.lower() or u'komiks' in soup.title.string.lower():
|
||||
a['href']='http://www.literatura.gildia.pl' + a['href']
|
||||
elif u'komiks' in soup.title.string.lower():
|
||||
a['href']='http://www.literatura.gildia.pl' + a['href']
|
||||
else:
|
||||
a['href']='http://www.gildia.pl' + a['href']
|
||||
return soup
|
||||
|
@ -7,6 +7,7 @@ class Gram_pl(BasicNewsRecipe):
|
||||
category = 'games'
|
||||
language = 'pl'
|
||||
oldest_article = 8
|
||||
index='http://www.gram.pl'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets= True
|
||||
extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
|
||||
@ -52,4 +53,7 @@ class Gram_pl(BasicNewsRecipe):
|
||||
tag=soup.findAll(name='div', attrs={'class':'picbox'})
|
||||
for t in tag:
|
||||
t['style']='float: left;'
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -8,6 +8,7 @@ class in4(BasicNewsRecipe):
|
||||
description = u'Serwis Informacyjny - Aktualnosci, recenzje'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
index='http://www.in4.pl/'
|
||||
#cover_url= 'http://www.in4.pl/recenzje/337/in4pl.jpg'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
@ -39,6 +40,7 @@ class in4(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ class INFRA(BasicNewsRecipe):
|
||||
description = u'Serwis Informacyjny INFRA - UFO, Zjawiska Paranormalne, Duchy, Tajemnice świata.'
|
||||
cover_url = 'http://npn.nazwa.pl/templates/ja_teline_ii/images/logo.jpg'
|
||||
category = 'UFO'
|
||||
index='http://infra.org.pl'
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheers=True
|
||||
@ -15,3 +16,11 @@ class INFRA(BasicNewsRecipe):
|
||||
remove_tags_after=dict(attrs={'class':'pagenav'})
|
||||
remove_tags=[dict(attrs={'class':'pagenav'})]
|
||||
feeds = [(u'Najnowsze wiadomo\u015bci', u'http://www.infra.org.pl/index.php?option=com_rd_rss&id=1')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -10,6 +10,23 @@ class Konflikty(BasicNewsRecipe):
|
||||
category='military, history'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
no_stylesheets = True
|
||||
keep_only_tags=[dict(attrs={'class':['title1', 'image']}), dict(id='body')]
|
||||
|
||||
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'), (u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'), (u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'), (u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml')]
|
||||
feeds = [(u'Aktualności', u'http://www.konflikty.pl/rss_aktualnosci_10.xml'),
|
||||
(u'Artyku\u0142y', u'http://www.konflikty.pl/rss_artykuly_10.xml'),
|
||||
(u'Historia', u'http://www.konflikty.pl/rss_historia_10.xml'),
|
||||
(u'Militaria', u'http://www.konflikty.pl/rss_militaria_10.xml'),
|
||||
(u'Relacje', u'http://www.konflikty.pl/rss_relacje_10.xml'),
|
||||
(u'Recenzje', u'http://www.konflikty.pl/rss_recenzje_10.xml'),
|
||||
(u'Teksty źródłowe', u'http://www.konflikty.pl/rss_tekstyzrodlowe_10.xml')]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for image in soup.findAll(name='a', attrs={'class':'image'}):
|
||||
if image.img and image.img.has_key('alt'):
|
||||
image.name='div'
|
||||
pos = len(image.contents)
|
||||
image.insert(pos, BeautifulSoup('<p style="font-style:italic;">'+image.img['alt']+'</p>'))
|
||||
return soup
|
@ -9,8 +9,9 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class recipeMagic(BasicNewsRecipe):
|
||||
title = 'National Geographic PL'
|
||||
__author__ = 'Marcin Urban 2011'
|
||||
__modified_by__ = 'fenuks'
|
||||
description = 'legenda wśród magazynów z historią sięgającą 120 lat'
|
||||
cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||
#cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
@ -42,11 +43,43 @@ class recipeMagic(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
feeds=[]
|
||||
|
||||
feeds = [
|
||||
('National Geographic PL', 'http://www.national-geographic.pl/rss/'),
|
||||
]
|
||||
def find_articles(self, url):
|
||||
articles = []
|
||||
soup=self.index_to_soup(url)
|
||||
tag=soup.find(attrs={'class':'arl'})
|
||||
art=tag.ul.findAll('li')
|
||||
for i in art:
|
||||
title=i.a['title']
|
||||
url=i.a['href']
|
||||
#date=soup.find(id='footer').ul.li.string[41:-1]
|
||||
desc=i.div.p.string
|
||||
articles.append({'title' : title,
|
||||
'url' : url,
|
||||
'date' : '',
|
||||
'description' : desc
|
||||
})
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds.append((u"Aktualności", self.find_articles('http://www.national-geographic.pl/aktualnosci/')))
|
||||
feeds.append((u"Artykuły", self.find_articles('http://www.national-geographic.pl/artykuly/')))
|
||||
|
||||
return feeds
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('artykuly0Cpokaz', 'drukuj-artykul')
|
||||
if 'artykuly' in url:
|
||||
return url.replace('artykuly/pokaz', 'drukuj-artykul')
|
||||
elif 'aktualnosci' in url:
|
||||
return url.replace('aktualnosci/pokaz', 'drukuj-artykul')
|
||||
else:
|
||||
return url
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.national-geographic.pl/biezace-wydania/')
|
||||
tag=soup.find(attrs={'class':'txt jus'})
|
||||
self.cover_url=tag.img['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
||||
|
||||
|
@ -81,5 +81,7 @@ class Nowa_Fantastyka(BasicNewsRecipe):
|
||||
title=soup.find(attrs={'class':'tytul'})
|
||||
if title:
|
||||
title['style']='font-size: 20px; font-weight: bold;'
|
||||
self.log.warn(soup)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.INDEX + a['href']
|
||||
return soup
|
||||
|
@ -7,6 +7,7 @@ class PC_Arena(BasicNewsRecipe):
|
||||
description = u'Najnowsze informacje z branży IT - testy, recenzje, aktualności, rankingi, wywiady. Twoje źródło informacji o sprzęcie komputerowym.'
|
||||
category = 'IT'
|
||||
language = 'pl'
|
||||
index='http://pcarena.pl'
|
||||
masthead_url='http://pcarena.pl/pcarena/img/logo.png'
|
||||
cover_url= 'http://pcarena.pl/pcarena/img/logo.png'
|
||||
no_stylesheets = True
|
||||
@ -23,3 +24,9 @@ class PC_Arena(BasicNewsRecipe):
|
||||
return 'http://pcarena.pl' + url
|
||||
else:
|
||||
return url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
@ -34,4 +34,12 @@ class tanuki(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
if 'tanuki-anime' in soup.title.string.lower():
|
||||
a['href']='http://anime.tanuki.pl' + a['href']
|
||||
elif 'tanuki-manga' in soup.title.string.lower():
|
||||
a['href']='http://manga.tanuki.pl' + a['href']
|
||||
elif 'tanuki-czytelnia' in soup.title.string.lower():
|
||||
a['href']='http://czytelnia.tanuki.pl' + a['href']
|
||||
return soup
|
@ -8,6 +8,7 @@ class webhosting_pl(BasicNewsRecipe):
|
||||
cover_url='http://webhosting.pl/images/logo.png'
|
||||
masthead_url='http://webhosting.pl/images/logo.png'
|
||||
oldest_article = 7
|
||||
index='http://webhosting.pl'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
@ -37,3 +38,9 @@ class webhosting_pl(BasicNewsRecipe):
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('webhosting.pl', 'webhosting.pl/print')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for a in soup('a'):
|
||||
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
|
||||
a['href']=self.index + a['href']
|
||||
return soup
|
Loading…
x
Reference in New Issue
Block a user