diff --git a/recipes/antyweb.recipe b/recipes/antyweb.recipe index b7d3d2583c..d85ed4adcc 100644 --- a/recipes/antyweb.recipe +++ b/recipes/antyweb.recipe @@ -21,21 +21,9 @@ class AntywebRecipe(BasicNewsRecipe): simultaneous_downloads = 3 keep_only_tags =[] - keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'})) - keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'})) - - - remove_tags =[] - remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'})) - remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'})) - remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'})) - remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'})) - - - extra_css = ''' - body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} - ''' + keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'entry-title '})) + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'news-content'})) + extra_css = '''body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}''' feeds = [ (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'), diff --git a/recipes/dilbert.recipe b/recipes/dilbert.recipe index ed2771debf..e98c9212a9 100644 --- a/recipes/dilbert.recipe +++ b/recipes/dilbert.recipe @@ -6,6 +6,7 @@ DrMerry added cover Image 2011-11-12 ''' from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup import re class DilbertBig(BasicNewsRecipe): @@ -16,7 +17,7 @@ class DilbertBig(BasicNewsRecipe): oldest_article = 15 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = True + use_embedded_content = False encoding = 'utf-8' publisher = 'UNITED FEATURE SYNDICATE, INC.' category = 'comic' @@ -30,25 +31,14 @@ class DilbertBig(BasicNewsRecipe): ,'publisher' : publisher } - feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip' )] - - def get_article_url(self, article): - return article.get('feedburner_origlink', None) + feeds = [(u'Dilbert', u'http://feed.dilbert.com/dilbert/daily_strip')] preprocess_regexps = [ (re.compile('strip\..*\.gif', re.DOTALL|re.IGNORECASE), lambda match: 'strip.zoom.gif') ] def preprocess_html(self, soup): - for tag in soup.findAll(name='a'): - if tag['href'].find('http://feedads') >= 0: - tag.extract() - return soup + for tag in soup.findAll(name='input'): + image = BeautifulSoup('') + return image - extra_css = ''' - h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} - h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} - img {max-width:100%; min-width:100%;} - p{font-family:Arial,Helvetica,sans-serif;font-size:small;} - body{font-family:Helvetica,Arial,sans-serif;font-size:small;} - '''