calibre/recipes/adventure_zone_pl.recipe
Tomasz Długosz 78cc12fe0e minor fixes
2013-04-02 00:16:33 +02:00

69 lines
3.4 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from calibre.web.feeds.news import BasicNewsRecipe
import re
class Adventure_zone(BasicNewsRecipe):
title = u'Adventure Zone'
__author__ = 'fenuks'
description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
category = 'games'
language = 'pl'
no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png'
index = 'http://www.adventure-zone.info/fusion/'
use_embedded_content = False
preprocess_regexps = [(re.compile(r"<td class='capmain'>Komentarze</td>", re.IGNORECASE), lambda m: ''),
(re.compile(r'</?table.*?>'), lambda match: ''),
(re.compile(r'</?tbody.*?>'), lambda match: '')]
remove_tags_before = dict(name='td', attrs={'class':'main-bg'})
remove_tags = [dict(name='img', attrs={'alt':'Drukuj'})]
remove_tags_after = dict(id='comments')
extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; } img.news-category {float: left; margin-right: 5px;}'
feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')]
'''def get_cover_url(self):
soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php')
cover=soup.find(id='box_OstatninumerAZ')
self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src']
return getattr(self, 'cover_url', self.cover_url)'''
def populate_article_metadata(self, article, soup, first):
result = re.search('(.+) - Adventure Zone', soup.title.string)
if result:
result = result.group(1)
else:
result = soup.body.find('strong')
if result:
result = result.string
if result:
result = result.replace('&amp;', '&')
result = result.replace('&#39;', '')
article.title = result
def skip_ad_pages(self, soup):
skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'})
skip_tag = skip_tag.findAll(name='a')
title = soup.title.string.lower()
if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)):
for r in skip_tag:
if r.strong and r.strong.string:
word=r.strong.string.lower()
if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)):
return self.index_to_soup('http://www.adventure-zone.info/fusion/print.php?type=A&item'+r['href'][r['href'].find('article_id')+7:], raw=True)
def preprocess_html(self, soup):
footer=soup.find(attrs={'class':'news-footer middle-border'})
r = soup.find(name='td', attrs={'class':'capmain'})
if r:
r.name='h1'
for item in soup.findAll(name=['tr', 'td']):
item.name='div'
if footer and len(footer('a'))>=2:
footer('a')[1].extract()
for item in soup.findAll(style=True):
del item['style']
for a in soup('a'):
if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
a['href']=self.index + a['href']
return soup