from calibre.web.feeds.news import BasicNewsRecipe class Adventure_zone(BasicNewsRecipe): title = u'Adventure Zone' __author__ = 'fenuks' description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.' # noqa category = 'games' language = 'pl' BASEURL = 'http://www.adventure-zone.info/fusion/' no_stylesheets = True extra_css = '.image {float: left; margin-right: 5px;}' oldest_article = 20 max_articles_per_feed = 100 cover_url = 'http://www.adventure-zone.info/inne/logoaz_2012.png' remove_attributes = ['style'] use_embedded_content = False keep_only_tags = [dict(attrs={'class': 'content'})] remove_tags = [dict(attrs={'class': 'footer'})] feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/rss/index.php')] _trigger_words = ('zapowied', 'recenzj', 'solucj', 'poradnik') @staticmethod def _is_linked_text(title): return 'zapowied' in title or 'recenz' in title or 'solucj' in title or 'poradnik' in title def skip_ad_pages(self, soup): skip_tag = soup.body.find(attrs={'class':'subject'}) skip_tag = skip_tag.findAll(name='a', href=True) title = soup.title.renderContents().lower() if self._is_linked_text(title): for r in skip_tag: word = r.renderContents() if not word: continue word = word.lower() if self._is_linked_text(word): return self.index_to_soup(self.BASEURL+r['href'], raw=True) def preprocess_html(self, soup): for link in soup.findAll('a', href=True): if not link['href'].startswith('http'): link['href'] = self.BASEURL + link['href'] return soup