from calibre.web.feeds.news import BasicNewsRecipe class Adventure_zone(BasicNewsRecipe): title = u'Adventure Zone' __author__ = 'fenuks' description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' category = 'games' language = 'pl' BASEURL = 'http://www.adventure-zone.info/fusion/' no_stylesheets = True extra_css = '.image {float: left; margin-right: 5px;}' oldest_article = 20 max_articles_per_feed = 100 cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png' remove_attributes = ['style'] use_embedded_content = False keep_only_tags = [dict(attrs={'class':'content'})] remove_tags = [dict(attrs={'class':'footer'})] feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')] def skip_ad_pages(self, soup): skip_tag = soup.body.find(attrs={'class':'content'}) skip_tag = skip_tag.findAll(name='a') title = soup.title.string.lower() if (('zapowied' in title) or ('recenzj' in title) or ('solucj' in title) or ('poradnik' in title)): for r in skip_tag: if r.strong and r.strong.string: word=r.strong.string.lower() if (('zapowied' in word) or ('recenzj' in word) or ('solucj' in word) or ('poradnik' in word)): return self.index_to_soup(self.BASEURL+r['href'], raw=True) def preprocess_html(self, soup): for link in soup.findAll('a', href=True): if not link['href'].startswith('http'): link['href'] = self.BASEURL + link['href'] return soup