Fixed varous recipes that had broken/dead feed links.

2025-08-11 09:13:57 -04:00 · 2016-10-09 10:50:14 +02:00 · 2016-10-09 10:50:14 +02:00 · 445955a537
commit 445955a537
parent d2eb1426b0
13 changed files with 117 additions and 383 deletions
--- a/recipes/baltimore_sun.recipe
+++ b/recipes/baltimore_sun.recipe
@ -24,65 +24,39 @@ class BaltimoreSun(BasicNewsRecipe):
    no_stylesheets = True
    remove_javascript = True
    remove_empty_feeds = True
+    auto_cleanup = False

    ignore_duplicate_articles = {'title'}
    keep_only_tags = [
-        dict(name=['div', 'section'], attrs={'class': [
-             "trb_article_title", "trb_article_leadart", 'trb_bylines', 'trb_article_dateline', 'trb_mainContent']}),
+        dict(name=['div'], attrs={'class': ['trb_ar_hl', 'trb_ar_hl_t',
+                                            'trb_ar_la', 'trb_ar_by',
+                                            'trb_ar_dateline', 'trb_ar_page']}),
    ]
    remove_tags = [
        dict(name=['meta', 'link']),
-        dict(name=['div', 'aside'], attrs={'class': lambda x: x and set(x.split()).intersection({
-            'trb_gptAd', 'trb_panelmod_container', 'trb_socialize', 'trb_taboola', 'trb_embed_related'})}),
    ]

-    def preprocess_html(self, soup):
-        for img in soup.findAll('img'):
-            img['src'] = img['data-baseurl']
-        return soup
-
    feeds = [
        # News ##
        (u'Top Headlines', u'http://feeds.feedburner.com/baltimoresun/news/rss2'),
        (u'Breaking News', u'http://feeds.feedburner.com/baltimoresun/news/local/annearundel/rss2'),
        (u'Top Maryland', u'http://feeds.feedburner.com/baltimoresun/news/local/rss2'),
-        # (u'Anne Arundel County', u'http://feeds.feedburner.com/baltimoresun/news/local/annearundel/rss2'),
        (u'Baltimore City', u'http://feeds.feedburner.com/baltimoresun/news/local/baltimore_city/rss20xml'),
-        # (u'Baltimore County', u'http://feeds.feedburner.com/baltimoresun/news/local/baltimore_county/rss2'),
-        # (u'Carroll County', u'http://feeds.feedburner.com/baltimoresun/news/local/carroll/rss2'),
-        # (u'Harford County', u'http://feeds.feedburner.com/baltimoresun/news/local/harford/rss2),
-        # (u'Howard County', u'http://feeds.feedburner.com/baltimoresun/news/local/howard/rss2'),
        (u'Education', u'http://feeds.feedburner.com/baltimoresun/news/education/rss2'),
-        # (u'Obituaries', u'http://feeds.feedburner.com/baltimoresun/news/obituaries/rss2'),
-        (u'Local Politics',
-         u'http://feeds.feedburner.com/baltimoresun/news/local/politics/rss2'),
+        (u'Local Politics', u'http://feeds.feedburner.com/baltimoresun/news/local/politics/rss2'),
        (u'Weather', u'http://feeds.feedburner.com/baltimoresun/news/weather/site/rss2'),
-        # (u'Traffic', u'http://feeds.feedburner.com/baltimoresun/news/traffic/rss2'),
        (u'Nation/world', u'http://feeds.feedburner.com/baltimoresun/news/nationworld/rss2'),
-        # (u'Weird News', u'http://feeds.feedburner.com/baltsun-weirdnews'),

        # Sports##
        (u'Top Sports', u'http://feeds.feedburner.com/baltimoresun/sports/rss2'),
        (u'Orioles/Baseball', u'http://www.baltimoresun.com/sports/orioles/rss2.0.xml'),
-        (u'Ravens/Football',
-         u'http://feeds.feedburner.com/baltimoresun/sports/football/rss2'),
-        # (u'Terps', u''http://feeds.feedburner.com/baltimoresun/sports/terps/rss2'),
-        # (u'College Football', u''feed://feeds.feedburner.com/baltimoresun/sports/college/football/rss2'),
-        # (u'Lacrosse', u'http://feeds.feedburner.com/baltimoresun/sports/college/lacrosse/rss2'),
-        # (u'Horse Racing', u'http://feeds.feedburner.com/baltimoresun/sports/horseracing/rss2'),
-        # (u'Golf', u'http://feeds.feedburner.com/baltimoresun/sports/golf/rss2'),
-        # (u'NBA', u'http://feeds.feedburner.com/baltimoresun/sports/basketball/rss2'),
-        # (u'High School', u'http://feeds.feedburner.com/baltimoresun/sports/highschool/rss2'),
-        # (u'Outdoors', u'http://feeds.feedburner.com/baltimoresun/sports/outdoors/rss2'),
+        (u'Ravens/Football', u'http://feeds.feedburner.com/baltimoresun/sports/football/rss2'),

        # Entertainment ##
-        (u'Celebrity News', u'http://baltimore.feedsportal.com/c/34255/f/623042/index.rss'),
        (u'Arts & Theater', u'http://feeds.feedburner.com/baltimoresun/entertainment/galleriesmuseums/rss2'),
        (u'Movies', u'http://www.baltimoresun.com/entertainment/movies/rss2.0.xml'),
-        (u'Music & Nightlife',
-         u'http://www.baltimoresun.com/entertainment/music/rss2.0.xml'),
-        (u'Restaurants & Food',
-         u'http://www.baltimoresun.com/entertainment/dining/rss2.0.xml'),
+        (u'Music & Nightlife', u'http://www.baltimoresun.com/entertainment/music/rss2.0.xml'),
+        (u'Restaurants & Food', u'http://www.baltimoresun.com/entertainment/dining/rss2.0.xml'),
        (u'TV/Media', u'http://www.baltimoresun.com/entertainment/tv/rss2.0.xml'),

        # Life ##
@ -91,104 +65,42 @@ class BaltimoreSun(BasicNewsRecipe):
        (u'Living Green', u'http://www.baltimoresun.com/features/green/rss2.0.xml'),
        (u'Fashion', u'http://www.baltimoresun.com/features/fashion/rss2.0.xml'),
        (u'Travel', u'http://www.baltimoresun.com/travel/rss2.0.xml'),
-        # (u'Faith', u'http://www.baltimoresun.com/features/faith/rss2.0.xml'),

        # Business ##
        (u'Top Business', u'http://www.baltimoresun.com/business/rss2.0.xml'),
        (u'Technology', u'http://www.baltimoresun.com/business/technology/rss2.0.xml'),
-        (u'Personal finance', u'http://baltimore.feedsportal.com/c/34255/f/623057/index.rss'),
        (u'Real Estate', u'http://www.baltimoresun.com/classified/realestate/rss2.0.xml'),
-        (u'Jobs', u'http://baltimore.feedsportal.com/c/34255/f/623059/index.rss'),
-        # (u'DIY', u'http://baltimore.feedsportal.com/c/34255/f/623060/index.rss'),
-        # (u'Consumer Safety', u'http://baltimore.feedsportal.com/c/34255/f/623061/index.rss'),
        (u'Investing', u'http://www.baltimoresun.com/business/money/rss2.0.xml'),

        # Opinion##
        (u'Sun Editorials', u'http://www.baltimoresun.com/news/opinion/editorial/rss2.0.xml'),
        (u'Op/Ed', u'http://www.baltimoresun.com/news/opinion/oped/rss2.0.xml'),
-        (u'Readers Respond', u'http://baltimore.feedsportal.com/c/34255/f/623065/index.rss'),
-
-        # Columnists ##
-        (u'Kevin Cowherd', u'http://www.baltimoresun.com/sports/bal-columnist-cowherd,0,6829726.columnist-rss2.0.xml'),
-        (u'Robert Ehrlich', u'http://www.baltimoresun.com/news/opinion/columnists/bal-columnist-ehrlich,0,1825227.columnist-rss2.0.xml'),
-        (u'Jacques Kelly', u'http://www.baltimoresun.com/news/maryland/bal-columnist-kelly,0,1154701.columnist-rss2.0.xml'),
-        (u'Marta H. Mossburg', u'http://www.baltimoresun.com/news/opinion/oped/bal-columnist-mossburg,0,7982155.columnist-rss2.0.xml'),
-        (u'Mike Preston', u'http://www.baltimoresun.com/sports/bal-columnist-preston,0,6169796.columnist-rss2.0.xml'),
-        (u'Susan Reimer', u'http://www.baltimoresun.com/news/opinion/bal-columnist-reimer,0,162466.columnist-rss2.0.xml'),
-        (u'Dan Rodricks', u'http://www.baltimoresun.com/news/maryland/bal-columnist-rodricks,0,7089843.columnist-rss2.0.xml'),
-        (u'Thomas F. Schaller', u'http://www.baltimoresun.com/news/opinion/columnists/bal-columnist-schaller,0,897397.columnist-rss2.0.xml'),
-        (u'Peter Schmuck', u'http://www.baltimoresun.com/sports/bal-columnist-schmuck,0,7485088.columnist-rss2.0.xml'),

        # News Blogs ##
-        (u'Baltimore Crime Beat',
-         u'http://baltimore.feedsportal.com/c/34255/f/623075/index.rss'),
        (u'InsideEd', u'http://www.baltimoresun.com/news/maryland/education/blog/rss2.0.xml'),
-        (u'Maryland Politics',
-         u'http://www.baltimoresun.com/news/maryland/politics/blog/rss2.0.xml'),
-        (u'Maryland Weather',
-         u'http://www.baltimoresun.com/news/weather/weather-blog/rss2.0.xml'),
-        (u'Second Opinion',
-         u'http://www.baltimoresun.com/news/opinion/second-opinion-blog/rss2.0.xml'),
-        (u'Sun Investigates',
-         u'http://www.baltimoresun.com/news/maryland/sun-investigates/rss2.0.xml'),
+        (u'Maryland Politics', u'http://www.baltimoresun.com/news/maryland/politics/blog/rss2.0.xml'),
+        (u'Maryland Weather', u'http://www.baltimoresun.com/news/weather/weather-blog/rss2.0.xml'),
+        (u'Second Opinion', u'http://www.baltimoresun.com/news/opinion/second-opinion-blog/rss2.0.xml'),
+        (u'Sun Investigates', u'http://www.baltimoresun.com/news/maryland/sun-investigates/rss2.0.xml'),
        (u'You Dont Say', u'http://www.baltimoresun.com/news/language-blog/rss2.0.xml'),

        # Business Blogs ##
        (u'BaltTech', u'http://www.baltimoresun.com/business/technology/blog/rss2.0.xml'),
-        (u'Consuming Interests',
-         u'http://www.baltimoresun.com/business/consuming-interests-blog/rss2.0.xml'),
-        (u'The Real Estate Wonk',
-         u'http://www.baltimoresun.com/business/real-estate/wonk/rss2.0.xml'),
+        (u'Consuming Interests', u'http://www.baltimoresun.com/business/consuming-interests-blog/rss2.0.xml'),
+        (u'The Real Estate Wonk', u'http://www.baltimoresun.com/business/real-estate/wonk/rss2.0.xml'),

        # Entertainment Blogs ##
        (u'ArtSmash', 'http://www.baltimoresun.com/entertainment/arts/artsmash/rss2.0.xml'),
-        (u'Baltimore Diner', u'http://baltimore.feedsportal.com/c/34255/f/623088/index.rss'),
        (u'Midnight Sun', u'http://www.baltimoresun.com/entertainment/music/midnight-sun-blog/rss2.0.xml'),
        (u'Read Street', u'http://www.baltimoresun.com/features/books/read-street/rss2.0.xml'),
        (u'Z on TV', u'http://www.baltimoresun.com/entertainment/tv/z-on-tv-blog/rss2.0.xml'),

        # Life Blogs ##
-        # (u'BMore Green', u'http://weblogs.baltimoresun.com/features/green/index.xml'),
-        (u'Baltimore Insider',
-         u'http://www.baltimoresun.com/features/baltimore-insider-blog/rss2.0.xml'),
+        (u'Baltimore Insider', u'http://www.baltimoresun.com/features/baltimore-insider-blog/rss2.0.xml'),
        (u'Picture of Health', u'http://www.baltimoresun.com/health/blog/rss2.0.xml'),
-        # (u'Unleashed', u'http://weblogs.baltimoresun.com/features/mutts/blog/index.xml'),
-
-        # b the site blogs ##
-        (u'TV Lust', u'http://baltimore.feedsportal.com/c/34255/f/623096/index.rss'),

        # Sports Blogs ##
-        (u'Baltimore Sports Blitz',
-         u'http://baltimore.feedsportal.com/c/34255/f/623097/index.rss'),
-        # (u'Lacrosse Insider',u'http://www.baltimoresun.com/sports/lacrosse-blog/rss2.0.xml'),
        (u'Orioles Insider', u'http://baltimore.feedsportal.com/c/34255/f/623100/index.rss'),
-        (u'Ravens Insider',
-         u'http://www.baltimoresun.com/sports/ravens/ravens-insider/rss2.0.xml'),
-        # (u'Ring Posts', u'http://weblogs.baltimoresun.com/sports/wrestling/blog/index.xml'),
-        (u'The Schmuck Stops Here',
-         u'http://www.baltimoresun.com/sports/schmuck-blog/rss2.0.xml'),
-        # (u'Tracking the Terps', u'http://weblogs.baltimoresun.com/sports/college/maryland_terps/blog/index.xml'),
-        # (u'Varsity Letters', u'http://weblogs.baltimoresun.com/sports/highschool/varsityletters/index.xml'),
+        (u'Ravens Insider', u'http://www.baltimoresun.com/sports/ravens/ravens-insider/rss2.0.xml'),
+        (u'The Schmuck Stops Here', u'http://www.baltimoresun.com/sports/schmuck-blog/rss2.0.xml'),
    ]
-
-    def get_article_url(self, article):
-        ans = None
-        try:
-            s = article.summary
-            ans = urllib.unquote(
-                re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
-        except:
-            pass
-        if ans is None:
-            ans = article.get('feedburner_origlink',
-                              article.get('guid', article.get('link')))
-        if ans is not None:
-            return ans.replace('?track=rss', '')
-
-    def skip_ad_pages(self, soup):
-        text = soup.find(text='click here to continue to article')
-        if text:
-            a = text.parent
-            url = a.get('href')
-            if url:
-                return self.index_to_soup(url, raw=True)
--- a/recipes/dallas.recipe
+++ b/recipes/dallas.recipe
@ -12,20 +12,24 @@ class DallasNews(BasicNewsRecipe):
    auto_cleanup = True

    feeds = [
+        ('News',
+         'http://www.dallasnews.com/news.rss'),
        ('Local News',
-         'http://www.dallasnews.com/news/politics/local-politics/?rss'),
-        ('National Politics',
-         'http://www.dallasnews.com/news/politics/national-politic/?rss'),
+         'http://www.dallasnews.com/news/local-politics.rss'),
        ('State Politics',
-         'http://www.dallasnews.com/news/politics/state-politics/?rss'),
+         'http://www.dallasnews.com/news/texas-politics.rss'),
        ('Religion',
-         'http://www.dallasnews.com/news/religion/?rss'),
+         'http://www.dallasnews.com/life/faith.rss'),
        ('Crime',
-         'http://www.dallasnews.com/news/crime/headlines/?rss'),
+         'http://www.dallasnews.com/news/crime.rss'),
        ('Celebrity News',
         'http://www.dallasnews.com/entertainment/celebrity-news/?rss&listname=TopStories'),
-        ('Nation',
-         'http://www.dallasnews.com/news/nation-world/nation/?rss'),
-        ('World',
-         'http://www.dallasnews.com/news/nation-world/world/?rss'),
+        ('Business',
+         'http://www.dallasnews.com/business.rss'),
+        ('Arts',
+         'http://www.dallasnews.com/arts.rss'),
+        ('Life',
+         'http://www.dallasnews.com/life.rss'),
+        ('Opinion',
+         'http://www.dallasnews.com/opinion.rss'),
    ]
--- a/recipes/digital_arts.recipe
+++ b/recipes/digital_arts.recipe
@ -18,12 +18,15 @@ articles_are_obfuscated = True

 class digiArts(BasicNewsRecipe):
    __author__ = 'Lorenzo Vigentini'
-    description = 'Digital Arts - comprehensive coverage of the art of graphic design, 3D, animation, video, effects, web and interactive design, in print and online.'  # noqa
+    description = ('Digital Arts - comprehensive coverage of the art of '
+                   'graphic design, 3D, animation, video, effects, web and '
+                   'interactive design, in print and online.')  # noqa
    cover_url = 'http://media.digitalartsonline.co.uk/graphics/logo_digital_arts.gif'

    title = 'Digital Arts Magazine  '
    publisher = 'IDG Communication'
-    category = 'Multimedia, photo, video, computing, product reviews, editing, cameras, production'
+    category = ('Multimedia, photo, video, computing, product reviews, '
+                'editing, cameras, production')

    language = 'en'
    encoding = 'cp1252'
@ -36,30 +39,22 @@ class digiArts(BasicNewsRecipe):

    remove_javascript = True
    no_stylesheets = True
-
-    def get_obfuscated_article(self, url):
-        br = self.get_browser()
-        br.open(url + '&print')
-
-        response = br.follow_link(url, nr=0)
-        html = response.read()
-
-        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-        self.temp_files[-1].write(html)
-        self.temp_files[-1].close()
-        return self.temp_files[-1].name
+    auto_cleanup = False

    keep_only_tags = [
-        dict(name='div', attrs={'id': ['articleHeader', 'articleContent']})
+        dict(name='h1', attrs={'itemprop': 'headline'}),
+        dict(name='span', attrs={'itemprop': 'author'}),
+        dict(name='section', attrs={'class': 'articleBody'}),
    ]

-    remove_tags = [
-        dict(name='div', attrs={'class': ['submissionBar', 'mpuContainer']}),
-        dict(name='div', attrs={'id': ['articleSidebar', 'articleFooter']})
-    ]
-    remove_tags_after = [
-        dict(name='p', attrs={'id': 'articlePageList'})
-    ]
+    # Feed are found here: http://www.digitalartsonline.co.uk/rss/
    feeds = [
-        (u'Content', u'http://rss.feedsportal.com/c/662/f/8410/index.rss')
+        ('Latest News Articles',
+         'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-news.xml'),
+        ('Latest Tutorials',
+         'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-tutorials.xml'),
+        ('Latest Reviews',
+         'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-reviews.xml'),
+        ('Latest Features',
+         'http://www.digitalartsonline.co.uk/rss/feeds/digitalarts-features.xml'),
    ]
--- a/recipes/discover_magazine.recipe
+++ b/recipes/discover_magazine.recipe
@ -112,23 +112,13 @@ class DiscoverMagazine(BasicNewsRecipe):
        return soup

    feeds = [
-        (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
-        (u'Health - Medicine',
-         u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
-        (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
-        (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
-        (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
-        (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
-        (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
-        (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
+        (u'Technologiy', u'http://feeds.feedburner.com/DiscoverTechnology'),
+        (u'Health & Medicine', u'http://feeds.feedburner.com/DiscoverHealthMedicine'),
+        (u'Mind Brain', u'http://feeds.feedburner.com/DiscoverMindBrain'),
+        (u'Space & Physics', u'http://feeds.feedburner.com/DiscoverSpace'),
+        (u'Living World', u'http://feeds.feedburner.com/DiscoverLivingWorld'),
+        (u'Environment', u'http://feeds.feedburner.com/DiscoverEnvironment'),
        (u"20 Things you didn't know about...",
-         u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
-        (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
-        (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
-        (u'What is This', u'http://discovermagazine.com/columns/what-is-this/rss.xml'),
-        (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
-        (u'Think Tech', u'http://discovermagazine.com/columns/think-tech/rss.xml'),
-        (u'Future Tech', u'http://discovermagazine.com/columns/future-tech/rss.xml'),
-        (u'Discover Interview',
-         u'http://discovermagazine.com/columns/discover-interview/rss.xml'),
+         u'http://feeds.feedburner.com/20ThingsYouDidntKnowAbout'),
+        (u'Vital Signs', u'http://feeds.feedburner.com/discovermagazine/VitalSigns'),
    ]
--- a/recipes/editor_and_publisher.recipe
+++ b/recipes/editor_and_publisher.recipe
@ -18,9 +18,12 @@ class EandP(BasicNewsRecipe):
    encoding = 'utf8'
    cover_url = 'http://www.editorandpublisher.com/images/EP_main_logo.gif'
    remove_javascript = True
+    auto_cleanup = True

    html2lrf_options = [
-        '--comment', description, '--category', category, '--publisher', publisher
+        '--comment', description,
+        '--category', category,
+        '--publisher', publisher
    ]

    html2epub_options = 'publisher="' + publisher + \
@ -34,21 +37,11 @@ class EandP(BasicNewsRecipe):
                 h2{font-size: large;}
                '''

-    # Keep only div:itemmgap
-
-    keep_only_tags = [
-        dict(name='div', attrs={'class': 'itemmgap'})
-    ]
-
    # Remove commenting/social media lins

    remove_tags_after = [dict(name='div', attrs={'class': 'clear'})]

-    feeds = [(u'Breaking News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx'),
-             (u'Business News',
-              u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=2'),
-             (u'Ad/Circ News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=3'),
-             (u'Newsroom', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=4'),
-             (u'Technology News',
-              u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=5'),
-             (u'Syndicates News', u'http://www.editorandpublisher.com/GenerateRssFeed.aspx?CategoryId=7')]
+    feeds = [
+        (u'Editor & Publisher', u'http://www.editorandpublisher.com/feed/'),
+        (u'Comments', u'http://www.editorandpublisher.com/comments/feed/'),
+    ]
--- a/recipes/everett_herald.recipe
+++ b/recipes/everett_herald.recipe
@ -5,32 +5,9 @@ class AdvancedUserRecipe1295088390(BasicNewsRecipe):
    title = u'Everett Herald'
    language = 'en'
    __author__ = '77ja65'
-    oldest_article = 4
+    oldest_article = 7
    max_articles_per_feed = 50
    no_stylesheets = True
-    masthead_url = 'http://heraldnet.com/images/hnet/jQueryComponents/jQueryNavigation/heraldnet_logo.png'
-    extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
+    auto_cleanup = True

-    feeds = [(u'Local News',
-              u'http://heraldnet.com/section/RSS02&mime=xml'),
-             (u'Sports', u'http://heraldnet.com/section/RSS04&mime=xml'),
-             (u'Entertainment',
-              u'http://heraldnet.com/section/RSS07&mime=xml'),
-             (u'Life', u'http://heraldnet.com/section/RSS03&mime=xml'),
-             (u'Breaking News',
-              u'http://heraldnet.com/section/RSS34&mime=xml'),
-             (u'Seahawks', u'http://heraldnet.com/section/RSS22&mime=xml'),
-             (u'HeraldNet', u'http://heraldnet.com/section/RSS01&mime=xml'),
-             (u'Inside Everett',
-              u'http://heraldnet.com/section/RSS26&mime=xml')
-             ]
-
-    def print_version(self, url):
-        return url + "&template=PrinterFriendly"
-
-    extra_css = '''
-                     h1{font-family:Arial,Helvetica,sans-serif; font-
- weight:bold;font-size:large;}
-                     h2{font-family:Arial,Helvetica,sans-serif; font-
- weight:normal;font-size:small;}
-                 '''
+    feeds = [(u'Local News', u'http://www.heraldnet.com/feed/')]
--- a/recipes/fairbanks_daily.recipe
+++ b/recipes/fairbanks_daily.recipe
@ -6,101 +6,29 @@ class FairbanksDailyNewsminer(BasicNewsRecipe):
    __author__ = 'Roger'
    oldest_article = 7
    max_articles_per_feed = 100
-
    description = 'The voice of interior Alaska since 1903'
    publisher = 'http://www.newsminer.com/'
    category = 'news, Alaska, Fairbanks'
    language = 'en'
-
-    # Make article titles, author and date bold, italic or small font.
-    # http://assets.matchbin.com/sites/635/stylesheets/newsminer.com.css
-    # (signature_line contains date, views, comments)
-    extra_css = '''
-                    .story_item_headline { font-size: medium; font-weight: bold; }
-                    .story_item_author { font-size: small; font-style:italic; }
-                    .signature_line { font-size: small; }
-                '''
-
    remove_javascript = True
    use_embedded_content = False
    no_stylesheets = True
    language = 'en'
    encoding = 'utf8'
    conversion_options = {'linearize_tables': True}
+    auto_cleanup = True

-    # TODO: The News-miner cover image seems a bit small.  Can this be
-    # enlarged by 10-30%?
-    masthead_url = 'http://d2uh5w9wm14i0w.cloudfront.net/sites/635/assets/top_masthead_-_menu_pic.jpg'
-
-    # In order to omit seeing number of views, number of posts and the pipe
-    # symbol for divider after the title and date of the article, a regex or
-    # manual processing is needed to get just the "story_item_date updated"
-    # (which contains the date).  Everything else on this line is pretty much not needed.
-    #
-    # Currently, you will see  the following:
-    # | Aug 24, 2011 | 654 views | 6 | |
-    # (ie. 6 comments)
-    #
-
-    # The following was suggested, but it looks like I also need to define self & soup
-    # (as well as bring in extra soup depends?)
-    # date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
-
-    # preprocess_regexps = [(re.compile(r'<span[^>]*addthis_separator*>'), lambda match: '') ]
-    # preprocess_regexps = [(re.compile(r'span class="addthis_separator">|</span>'), lambda match: '') ]
-
-    # preprocess_regexps = [
-    #           (re.compile(r'<start>.*?<end>', re.IGNORECASE | re.DOTALL), lambda match : ''),
-    #               ]
-
-    # def get_browser(self):
-    # def preprocess_html(soup, first_fetch):
-    #    date = self.tag_to_string(soup.find('span', attrs={'class':'story_item_date updated'}))
-    #    return
-
-    # preprocess_regexps = [(re.compile(r'&nbsp;|.*?', re.DOTALL), lambda m: '')]
-
-    keep_only_tags = [
-        dict(name='div', attrs={'class': 'story_item_headline entry-title'}),
-        dict(name='div', attrs={'class': 'full_story'})
-    ]
-
-    remove_tags = [
-        # Try getting rid of some signature_line (date line) stuff
-        dict(name='img', attrs={'class': 'dont_touch_me'}),
-        dict(name='span', attrs={
-            'class': 'number_recommendations'}),
-
-        # Removes div within <!-- AddThis Button BEGIN --> <!--
-        # AddThis Button END -->
-        dict(name='div', attrs={
-            'class': 'addthis_toolbox addthis_default_style'}),
-
-        dict(name='div', attrs={'class': 'related_content'}),
-        dict(name='div', attrs={'id': 'comments_container'})
-    ]
-
-    # Comment-out or uncomment any of the following RSS feeds according to your
-    # liking.
-    #
-    # TODO: Some random bits of text might be trailing the last page (or TOC on
-    # MOBI files), these are bits of public posts and comments and need to also
-    # be removed.
-    #
    feeds = [
-        (u'Alaska News', u'http://newsminer.com/rss/rss_feeds/alaska_news?content_type=article&tags=alaska_news&page_name=rss_feeds&instance=alaska_news'),
-        (u'Local News', u'http://newsminer.com/rss/rss_feeds/local_news?content_type=article&tags=local_news&page_name=rss_feeds&offset=0&instance=local_news'),
-        (u'Business', u'http://newsminer.com/rss/rss_feeds/business_news?content_type=article&tags=business_news&page_name=rss_feeds&instance=business_news'),
-        (u'Politics', u'http://newsminer.com/rss/rss_feeds/politics_news?content_type=article&tags=politics_news&page_name=rss_feeds&instance=politics_news'),
-        (u'Sports', u'http://newsminer.com/rss/rss_feeds/sports_news?content_type=article&tags=sports_news&page_name=rss_feeds&instance=sports_news'),
-        (u'Latitude 65 feed', u'http://newsminer.com/rss/rss_feeds/latitude_65?content_type=article&tags=latitude_65&page_name=rss_feeds&offset=0&instance=latitude_65'),  # noqa
-        # (u'Sundays', u'http://newsminer.com/rss/rss_feeds/Sundays?content_type=article&tags=alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Sundays'),  # noqa
-        (u'Outdoors', u'http://newsminer.com/rss/rss_feeds/Outdoors?content_type=article&tags=outdoors&page_name=rss_feeds&instance=Outdoors'),
-        # (u'Fairbanks Grizzlies', u'http://newsminer.com/rss/rss_feeds/fairbanks_grizzlies?content_type=article&tags=fairbanks_grizzlies&page_name=rss_feeds&instance=fairbanks_grizzlies'),  # noqa
-        # (u'Newsminer', u'http://newsminer.com/rss/rss_feeds/Newsminer?content_type=article&tags=ted_stevens_bullets+ted_stevens+sports_news+business_news+fairbanks_grizzlies+dermot_cole_column+outdoors+alaska_science_forum+scott_mccrea+interior_gardening+in_the_bush+judy_ferguson+book_reviews+theresa_bakker+judith_kleinfeld+interior_scrapbook+nuggets_comics+freeze_frame&page_name=rss_feeds&tag_inclusion=or&instance=Newsminer'),  # noqa
-        (u'Opinion', u'http://newsminer.com/rss/rss_feeds/Opinion?content_type=article&tags=editorials&page_name=rss_feeds&instance=Opinion'),
-        (u'Youth', u'http://newsminer.com/rss/rss_feeds/Youth?content_type=article&tags=youth&page_name=rss_feeds&instance=Youth'),
-        # (u'Dermot Cole Blog', u'http://newsminer.com/rss/rss_feeds/dermot_cole_blog+rss?content_type=blog+entry&sort_by=posted_on&user_ids=3015275&page_name=blogs_dermot_cole&limit=10&instance=dermot_cole_blog+rss'),  # noqa
-        (u'Dermot Cole Column', u'http://newsminer.com/rss/rss_feeds/Dermot_Cole_column?content_type=article&tags=dermot_cole_column&page_name=rss_feeds&instance=Dermot_Cole_column'),  # noqa
-        # (u'Sarah Palin', u'http://newsminer.com/rss/rss_feeds/sarah_palin?content_type=article&tags=palin_in_the_news+palin_on_the_issues&page_name=rss_feeds&tag_inclusion=or&instance=sarah_palin')  # noqa
+        (u'Alaska News',
+         u'http://www.newsminer.com/search/?f=rss&t=article&c=news/alaska_news&l=50&s=start_time&sd=desc'),
+        (u'Local News',
+         u'http://www.newsminer.com/search/?f=rss&t=article&c=news/local_news&l=50&s=start_time&sd=desc'),
+        (u'Business',
+         u'http://www.newsminer.com/search/?f=rss&t=article&c=business&l=50&s=start_time&sd=desc'),
+        (u'Politics',
+         u'http://www.newsminer.com/search/?f=rss&t=article&c=news/politics&l=50&s=start_time&sd=desc'),
+        (u'Sports',
+         u'http://www.newsminer.com/search/?f=rss&t=article&c=sports&l=50&s=start_time&sd=desc'),
+        (u'Opinion',
+         u'http://www.newsminer.com/search/?f=rss&t=article&c=opinion&l=50&s=start_time&sd=desc'),
    ]
--- a/recipes/fan_graphs.recipe
+++ b/recipes/fan_graphs.recipe
@ -20,6 +20,7 @@ class FanGraphs(BasicNewsRecipe):
    category = 'Baseball'
    language = 'en'
    publication_type = 'Blog'
+    auto_cleanup = True

    description = 'Baseball statistical analysis, graphs, and projections.'
    __author__ = 'David Appelman'
@ -27,9 +28,8 @@ class FanGraphs(BasicNewsRecipe):

    feeds = [
        (u'Fangraphs', u'http://feeds.feedburner.com/FanGraphs?format=xml'),
-        (u'Rotographs', u'http://www.wizardrss.com/feed/feeds.feedburner.com/RotoGraphs?format=xml'),
-        (u'Community', u'http://www.wizardrss.com/feed/www.fangraphs.com/community/?feed=rss2'),
-        (u'NotGraphs', u'http://www.wizardrss.com/feed/www.fangraphs.com/not/?feed=rss2')]
+        (u'Rotographs', u'http://feeds.feedburner.com/RotoGraphs?format=xml'),
+        (u'NotGraphs', u'http://feeds.feedburner.com/NotGraphs?format=xml')]

    extra_css = '''
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
--- a/recipes/glamour.recipe
+++ b/recipes/glamour.recipe
@ -10,37 +10,27 @@ class AdvancedUserRecipe1305547242(BasicNewsRecipe):
    language = 'en'
    remove_javascript = True
    __author__ = 'Anonymous'
-    remove_tags = [
-        dict(name='div', attrs={'class': 'articles_footer', 'class': 'printoptions'})]
+    auto_cleanup = True

-    def print_version(self, url):
-        return url + '?printable=true'
-
-    def preprocess_html(self, soup):
-        for alink in soup.findAll('a'):
-            if alink.string is not None:
-                tstr = alink.string
-                alink.replaceWith(tstr)
-        return soup
-
-    feeds = [	(u'All Fashion', u'http://feeds.glamour.com/glamour/all_fashion'),
-              (u'All Beauty', u'http://feeds.glamour.com/glamour/all_beauty'),
+    feeds = [
+        (u'All Fashion',
+         u'http://feeds.glamour.com/glamour/all_fashion'),
+        (u'All Beauty',
+         u'http://feeds.glamour.com/glamour/all_beauty'),
        (u'All Sex, Love & Life',
         u'http://feeds.glamour.com/glamour/sex_love_life'),
        (u'All Health & Fitness',
         u'http://feeds.glamour.com/glamour/health_fitness'),
-              (u'Shopping', u'http://feeds.glamour.com/glamour/shopping'),
        (u'Slaves to Fashion blog',
         u'http://feeds.glamour.com/glamour/slavestofashion'),
        (u'The Girls in the Beauty Department',
         u'http://feeds.glamour.com/glamour/thegirlsinthebeautydepartment'),
-              (u'Smitten blog', u'http://feeds.glamour.com/glamour/smitten'),
-              (u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
-              (u'Single-ish blog', u'http://feeds.glamour.com/glamour/glamoursingle-ish'),
-              (u'Save the Date', u'http://feeds.feedburner.com/glamour/save-the-date'),
-              (u'Vitamin G blog', u'http://feeds.glamour.com/glamour/vitamin-g'),
-              (u'Margarita Shapes Up blog',
-               u'http://feeds.glamour.com/glamour/margaritashapesup'),
-              (u'Little Miss Fortune blog',
-               u'http://feeds.glamour.com/glamour/little-miss-fortune'),
+        (u'Smitten blog',
+         u'http://feeds.glamour.com/glamour/smitten'),
+        (u'Save the Date',
+         u'http://feeds.feedburner.com/glamour/save-the-date'),
+        (u'Save the Date',
+         u'http://feeds.feedburner.com/glamour/save-the-date'),
+        (u'Vitamin G blog',
+         u'http://feeds.glamour.com/glamour/vitamin-g'),
    ]
--- a/recipes/greensboro_news_and_record.recipe
+++ b/recipes/greensboro_news_and_record.recipe
@ -19,29 +19,19 @@ class NewsandRecord(BasicNewsRecipe):
    encoding = 'utf-8'
    remove_javascript = True
    no_stylesheets = True
+    auto_cleanup = True

    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }

-    remove_tags_before = dict(name='h3', attrs={'class': 'nrcTxt_headline'})
-    remove_tags_after = dict(name='div', attrs={'id': 'nrcBlk_ContentBody'})
-
-    remove_tags = [
-        dict(name='iframe'),
-        dict(name=['notags', 'embed', 'object', 'link', 'img']),
-
-    ]
-
    feeds = [
-        ('News', 'http://www.news-record.com/news/archive/feed'),
-        ('Greensboro News', 'http://www.news-record.com/news/greensboro/feed'),
-        ('Education', 'http://www.news-record.com/news/education/feed'),
-        ('Government', 'http://www.news-record.com/news/government/feed'),
-        ('College Sports', 'http://www.news-record.com/sports/college/feed'),
-        ('Sports Extra', 'http://www.news-record.com/blog/sportsextra/feed'),
-        ('Life', 'http://www.news-record.com/life/top/feed'),
-        ('NASCAR', 'http://www.news-record.com/sports/nascar/top/feed'),
-        ('Editorials', 'http://www.news-record.com/opinion/editorials/feed'),
-        ('Letters to the Editor', 'http://www.news-record.com/opinion/letters/feed')
+        ('News', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=news,news/*&f=rss'),
+        ('Greensboro News', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/local,news/crime,news/goverment,news/schools,news/rockingham_county,news/local,news/crime,news/goverment,news/schools,news/rockingham_county/*&f=rss'),
+        ('Business', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=business,business/*&f=rss'),
+        ('Local Business', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=business/local_business,business/local_business/*&f=rss'),
+        ('Sports', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=sports,sports/*&f=rss'),
+        ('College Sports', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=sports/college,sports/college/*&f=rss'),
+        ('Sports Extra', 'http://www.greensboro.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=blogs/sports_extra,blogs/sports_extra/*&f=rss'),
+        ('Life', 'http://www.greensboro.com/search/?q=&t=article&l=10&d=&d1=&d2=&s=start_time&sd=desc&c[]=life,life/*&f=rss'),
    ]
--- a/recipes/hartford_courant.recipe
+++ b/recipes/hartford_courant.recipe
@ -12,36 +12,11 @@ class ChicagoTribune(BasicNewsRecipe):
    __author__ = 'Being and Sujata Raman'
    description = 'Politics, local and business news from Hartford'
    language = 'en'
-
    use_embedded_content = False
    no_stylesheets = True
    remove_javascript = True
+    auto_cleanup = True

-    keep_only_tags = [dict(name='div', attrs={'class': ["story", "entry-asset asset hentry"]}),
-                      dict(name='div', attrs={
-                           'id': ["pagebody", "story", "maincontentcontainer"]}),
-                      ]
-    remove_tags_after = [{'class': ['photo_article', ]}]
-
-    remove_tags = [
-        {'id': ["moduleArticleTools", "content-bottom", "rail", "articleRelates module", "toolSet", "relatedrailcontent", "div-wrapper", "beta", "atp-comments", "footer"]},  # noqa
-        {'class': ["clearfix", "relatedTitle", "articleRelates module", "asset-footer", "tools", "comments",
-                              "featurePromo", "featurePromo fp-topjobs brownBackground", "clearfix fullSpan brownBackground", "curvedContent"]},
-        dict(name='font', attrs={'id': ["cr-other-headlines"]})]
-    extra_css = '''
-                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
-                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
-                    .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
-                    p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
-                    .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-    '''
    feeds = [
        ('Breaking News', 'http://feeds.feedburner.com/courant-breaking-news/'),
        ('Nation/World News', 'http://feeds.feedburner.com/courant-nation-world/'),
@ -64,29 +39,9 @@ class ChicagoTribune(BasicNewsRecipe):
        ('Music', 'http://feeds.feedburner.com/courant-music/'),
        ('TV', 'http://feeds.feedburner.com/courant-tv/'),
        ('Movies', 'http://feeds.feedburner.com/courant-movies/'),
-        # ('Metromix headlines', 'http://feeds.feedburner.com/metromix/topheadlines/'),
-        # ('Metromix events', 'http://feeds.feedburner.com/metromix/events/'),
-        # ('Metromix restaurants', 'http://feeds.feedburner.com/metromix/restaurants/'),
        ('Outdoors', 'http://feeds.feedburner.com/courant-outdoors/'),
        ('Peter Marteka', 'http://feeds.feedburner.com/courant-marteka-column/'),
-        ('Susan Campbell', 'http://feeds.feedburner.com/courant-campbell-column/'),
-        ('Helen Ubinas', 'http://feeds.feedburner.com/courant-helen-ubinas-column/'),
        ('Jim Shea', 'http://feeds.feedburner.com/courant-jim-shea-column/'),
        ('Tom Condon', 'http://feeds.feedburner.com/courant-tom-condon-column/'),
        ('Colin McEnroe', 'http://feeds.feedburner.com/courant-colin-mcenroe-column/'),
    ]
-
-    def get_article_url(self, article):
-        print article.get('feedburner_origlink', article.get('guid', article.get('link')))
-        return article.get('feedburner_origlink', article.get('guid', article.get('link')))
-
-    def postprocess_html(self, soup, first_fetch):
-        for t in soup.findAll(['table', 'tr', 'td']):
-            t.name = 'div'
-
-        for tag in soup.findAll('form', dict(attrs={'name': ["comments_form"]})):
-            tag.extract()
-        for tag in soup.findAll('font', dict(attrs={'id': ["cr-other-headlines"]})):
-            tag.extract()
-
-        return soup
--- a/recipes/icons/digital_arts.png
+++ b/recipes/icons/digital_arts.png
--- a/recipes/icons/heritage_foundation.png
+++ b/recipes/icons/heritage_foundation.png