From 9a6073fcfb7110e714c1252034642ae51352e0b9 Mon Sep 17 00:00:00 2001 From: intromatyk Date: Wed, 24 Jul 2013 21:07:29 +0200 Subject: [PATCH 1/2] fixed antyweb recipe --- recipes/antyweb.recipe | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe index b7d3d2583c..d85ed4adcc 100644 --- a/recipes/antyweb.recipe +++ b/recipes/antyweb.recipe @@ -21,21 +21,9 @@ class AntywebRecipe(BasicNewsRecipe): simultaneous_downloads = 3 keep_only_tags =[] - keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'})) - - - remove_tags =[] - remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'})) - remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'})) - remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'})) - - - extra_css = ''' - body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} - ''' + keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'entry-title '})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-content'})) + extra_css = '''body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}''' feeds = [ (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'), From f555ab52792596815b73a044c19b8b51d42b4c6b Mon Sep 17 00:00:00 2001 From: intromatyk Date: Wed, 24 Jul 2013 22:32:54 +0200 Subject: [PATCH 2/2] update Dilbert recipe --- recipes/dilbert.recipe | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/recipes/dilbert.recipe b/recipes/dilbert.recipe index ed2771debf..d64f6e6882 100644 --- a/recipes/dilbert.recipe +++ b/recipes/dilbert.recipe @@ -16,7 +16,7 @@ class DilbertBig(BasicNewsRecipe): oldest_article = 15 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = True + use_embedded_content = False encoding = 'utf-8' publisher = 'UNITED FEATURE SYNDICATE, INC.' category = 'comic' @@ -30,25 +30,13 @@ class DilbertBig(BasicNewsRecipe): ,'publisher' : publisher } - feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip' )] - - def get_article_url(self, article): - return article.get('feedburner_origlink', None) + feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')] preprocess_regexps = [ (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE), lambda match: 'strip.zoom.gif') ] def preprocess_html(self, soup): - for tag in soup.findAll(name='a'): - if tag['href'].find('http://feedads') >= 0: - tag.extract() - return soup - - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - img {max-width:100%; min-width:100%;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - ''' + for tag in soup.findAll(name='input'): + image = BeautifulSoup('') + return image