diff --git a/recipes/adventure_zone_pl.recipe b/recipes/adventure_zone_pl.recipe index 2a6cf9957d..dd47af946a 100644 --- a/recipes/adventure_zone_pl.recipe +++ b/recipes/adventure_zone_pl.recipe @@ -21,35 +21,24 @@ class Adventure_zone(BasicNewsRecipe): extra_css = '.main-bg{text-align: left;} td.capmain{ font-size: 22px; }' feeds = [(u'Nowinki', u'http://www.adventure-zone.info/fusion/feeds/news.php')] - '''def parse_feeds (self): - feeds = BasicNewsRecipe.parse_feeds(self) - soup=self.index_to_soup(u'http://www.adventure-zone.info/fusion/feeds/news.php') - tag=soup.find(name='channel') - titles=[] - for r in tag.findAll(name='image'): - r.extract() - art=tag.findAll(name='item') - for i in art: - titles.append(i.title.string) - for feed in feeds: - for article in feed.articles[:]: - article.title=titles[feed.articles.index(article)] - return feeds''' - - '''def get_cover_url(self): soup = self.index_to_soup('http://www.adventure-zone.info/fusion/news.php') cover=soup.find(id='box_OstatninumerAZ') self.cover_url='http://www.adventure-zone.info/fusion/'+ cover.center.a.img['src'] return getattr(self, 'cover_url', self.cover_url)''' + def populate_article_metadata(self, article, soup, first): result = re.search('(.+) - Adventure Zone', soup.title.string) if result: - article.title = result.group(1) + result = result.group(1) else: result = soup.body.find('strong') if result: - article.title = result.string + result = result.string + if result: + result = result.replace('&', '&') + result = result.replace(''', '’') + article.title = result def skip_ad_pages(self, soup): skip_tag = soup.body.find(name='td', attrs={'class':'main-bg'}) diff --git a/recipes/benchmark_pl.recipe b/recipes/benchmark_pl.recipe index 95c5488a24..c934cc4ac4 100644 --- a/recipes/benchmark_pl.recipe +++ b/recipes/benchmark_pl.recipe @@ -1,5 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe import re +from calibre.ebooks.BeautifulSoup import Comment + class BenchmarkPl(BasicNewsRecipe): title = u'Benchmark.pl' __author__ = 'fenuks' @@ -13,10 +15,10 @@ class BenchmarkPl(BasicNewsRecipe): no_stylesheets = True remove_attributes = ['style'] preprocess_regexps = [(re.compile(ur'