From ee72df324b4b49558dead053cd43117862d884ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20D=C5=82ugosz?= Date: Sun, 7 Apr 2013 23:14:08 +0200 Subject: [PATCH] fixes for fronda --- recipes/fronda.recipe | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/recipes/fronda.recipe b/recipes/fronda.recipe index 6ed5d052a3..5ae78ae848 100644 --- a/recipes/fronda.recipe +++ b/recipes/fronda.recipe @@ -23,6 +23,7 @@ class Fronda(BasicNewsRecipe): extra_css = ''' h1 {font-size:150%} .body {text-align:left;} + div#featured-image {font-style:italic; font-size:70%} ''' earliest_date = date.today() - timedelta(days=oldest_article) @@ -55,7 +56,10 @@ class Fronda(BasicNewsRecipe): articles = {} for url, genName in genres: - soup = self.index_to_soup('http://www.fronda.pl/c/'+ url) + try: + soup = self.index_to_soup('http://www.fronda.pl/c/'+ url) + except: + continue articles[genName] = [] for item in soup.findAll('li'): article_h = item.find('h2') @@ -77,16 +81,15 @@ class Fronda(BasicNewsRecipe): ] remove_tags = [ - dict(name='div', attrs={'class':['related-articles', - 'button right', - 'pagination']}), + dict(name='div', attrs={'class':['related-articles','button right','pagination','related-articles content']}), dict(name='h3', attrs={'class':'block-header article comments'}), - dict(name='ul', attrs={'class':'comment-list'}), - dict(name='ul', attrs={'class':'category'}), - dict(name='ul', attrs={'class':'tag-list'}), + dict(name='ul', attrs={'class':['comment-list','category','tag-list']}), dict(name='p', attrs={'id':'comments-disclaimer'}), dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}), dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}), - dict(name='div', attrs={'class':'related-articles content'}), - dict(name='div', attrs={'id':'comment-form'}) + dict(name='div', attrs={'id':'comment-form'}), + dict(name='span', attrs={'class':'separator'}) ] + + preprocess_regexps = [ + (re.compile(r'komentarzy: .*?', re.IGNORECASE | re.DOTALL | re.M ), lambda match: '')]