Merge branch 'master' of https://github.com/CoderAllan/calibre

2025-07-09 03:04:10 -04:00 · 2016-10-14 09:12:25 +05:30 · 2016-10-14 09:12:25 +05:30 · 3def2109c0
commit 3def2109c0
parent 3b644906a4 3b21bf95ce
20 changed files with 137 additions and 494 deletions
--- a/recipes/moscow_times.recipe
+++ b/recipes/moscow_times.recipe
@ -19,54 +19,14 @@ class Moscowtimes(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    remove_empty_feeds = True
-    encoding = 'cp1251'
-    masthead_url = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif'
    publication_type = 'newspaper'
+    auto_cleanup = True

-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
-
-    extra_css      = '''
-                        h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
-                        .article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;}
-                        .autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
-                        .photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
-                        .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
-                        '''
    feeds = [
-
-    (u'Top Stories', u'http://www.themoscowtimes.com/rss/top'),
-    (u'Current Issue', u'http://www.themoscowtimes.com/rss/issue'),
-    (u'News', u'http://www.themoscowtimes.com/rss/news'),
-    (u'Business', u'http://www.themoscowtimes.com/rss/business'),
-    (u'Art and Ideas', u'http://www.themoscowtimes.com/rss/art'),
-    (u'Opinion', u'http://www.themoscowtimes.com/rss/opinion')
+        (u'Top Stories', u'https://themoscowtimes.com/feeds/main.xml'),
+        (u'Moscow', u'https://themoscowtimes.com/feeds/moscow.xml'),
+        (u'Russia', u'https://themoscowtimes.com/feeds/russia.xml'),
+        (u'World', u'https://themoscowtimes.com/feeds/world.xml'),
+        (u'Business', u'https://themoscowtimes.com/feeds/business.xml'),
+        (u'Opinion', u'https://themoscowtimes.com/feeds/opinion.xml')
    ]
-
-    keep_only_tags = [dict(name='div', attrs={'id': 'content'})]
-    remove_tags = [
-        dict(name='div', attrs={'class': ['photo_nav', 'phototext']}), dict(
-            name=['iframe', 'meta', 'base', 'link', 'embed', 'object'])
-    ]
-
-    def preprocess_html(self, soup):
-        for lnk in soup.findAll('a'):
-            if lnk.string is not None:
-                ind = self.tag_to_string(lnk)
-                lnk.replaceWith(ind)
-        return soup
-
-    def print_version(self, url):
-        return url.replace('.themoscowtimes.com/', '.themoscowtimes.com/print/')
-
-    def get_cover_url(self):
-        cover_url = None
-        href = 'http://www.themoscowtimes.com/pdf/'
-        soup = self.index_to_soup(href)
-        div = soup.find('div', attrs={'class': 'left'})
-        if div:
-            a = div.find('a')
-            if a:
-                cover_url = 'http://www.themoscowtimes.com' + a.img['src']
-        return cover_url
--- a/recipes/newsstraitstimes.recipe
+++ b/recipes/newsstraitstimes.recipe
@ -1,4 +1,3 @@
-
 __license__ = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -11,7 +10,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Newstraitstimes(BasicNewsRecipe):
    title = 'New Straits Times from Malaysia'
    __author__ = 'Darko Miletic'
-    description = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
+    description = ('Learning Curve, Sunday People, '
+                   'New Straits Times from Malaysia')
    publisher = 'nst.com.my'
    category = 'news, politics, Malaysia'
    oldest_article = 2
@ -20,13 +20,6 @@ class Newstraitstimes(BasicNewsRecipe):
    encoding = 'cp1252'
    use_embedded_content = False
    language = 'en'
-    masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
+    auto_cleanup = True

-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
-
-    remove_tags = [dict(name=['link', 'table'])]
-    keep_only_tags = dict(name='div', attrs={'id': 'haidah'})
-
-    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
+    feeds = [(u'Articles', u'http://www.nst.com.my/latest.xml')]
--- a/recipes/oldnewthing.recipe
+++ b/recipes/oldnewthing.recipe
@ -19,14 +19,6 @@ class OldNewThing(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    publication_type = 'blog'
-    extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} .code{font-family: "Lucida Console",monospace} '
+    auto_cleanup = True

-    conversion_options = {
-        'comment': description, 'tags': 'blog, windows, microsoft, programming', 'publisher': 'Raymond Chen', 'language': language
-    }
-
-    remove_attributes = ['width', 'height']
-    keep_only_tags = [dict(attrs={'class': 'full-post'})]
-    remove_tags = [
-        dict(attrs={'class': ['post-attributes', 'post-tags', 'post-actions']})]
-    feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]
+    feeds = [(u'Posts', u'https://blogs.msdn.microsoft.com/oldnewthing/feed')]
--- a/recipes/pc_advisor.recipe
+++ b/recipes/pc_advisor.recipe
@ -32,56 +32,16 @@ class pcAdvisor(BasicNewsRecipe):

    remove_javascript = True
    no_stylesheets = True
-
-    keep_only_tags = [
-        dict(name='div', attrs={'id': 'articlecontent'})
-    ]
-
-    remove_tags = [
-        dict(name='div', attrs={'id': ['crosssitesignup', 'submitarticle', 'dontPrint',
-                                       'commentsForm', 'userReviewFormContainer', 'reevooContainerId']}),
-        dict(name='div', attrs={'class': 'mpu'}),
-        dict(name='p', attrs={'id': 'articlePageList'}),
-        dict(name='div', attrs={
-             'style': ['margin: 0pt 10px 5px;', 'margin: 0pt 10px 5px;']}),
-        dict(name='p', attrs={'class': 'dontPrint'}),
-        dict(name='h2', attrs={'class': 'sectionTitle'}),
-        dict(name='a', attrs={'title': 'Subscribe to PC Advisor'}),
-        dict(name='a', attrs={'name': 'revooContent'}),
-        {'name': ['form', 'script', 'link']}
-    ]
-
-    remove_tags_after = [
-        dict(name='p', attrs={'id': 'crosssitesignup'})
-    ]
-
-    def get_article_url(self, article):
-        return article.get('guid',  None)
+    auto_cleanup = True

    feeds = [
-        (u'News Headlines', u'http://www.pcadvisor.co.uk/rss/feeds/pcanews.xml'),
-        (u'Reviews', u'http://www.pcadvisor.co.uk/rss/feeds/pcareviews.xml'),
-        (u'New Products',
-         u'http://www.pcadvisor.co.uk/rss/feeds/blog18.xml'),
-        (u'PC Advisor Blog',
-         u'http://www.pcadvisor.co.uk/rss/feeds/blog4.xml'),
-        (u'PC Security',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-security.xml'),
-        (u'Laptops', u'http://www.pcadvisor.co.uk/rss/feeds/pca-laptop.xml'),
-        (u'Green Computing',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-green-computing.xml'),
-        (u'Internet and broadband',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-internet.xml'),
-        (u'Prones and PDAs',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-phones.xml'),
-        (u'Software', u'http://www.pcadvisor.co.uk/rss/feeds/pca-software.xml'),
-        (u'Small Business',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-small-business.xml'),
-        (u'Photo and video',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-photo-video.xml'),
-        (u'Mac News', u'http://www.pcadvisor.co.uk/rss/feeds/pca-mac.xml'),
-        (u'Linux', u'http://www.pcadvisor.co.uk/rss/feeds/pca-linux.xml'),
-        (u'WiFi and Networking',
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-networking.xml'),
-        (u'Gadgets', u'http://www.pcadvisor.co.uk/rss/feeds/pca-gadgets.xml')
+        (u'Latest', u'http://www.pcadvisor.co.uk/latest/rss'),
+        (u'News', u'http://www.pcadvisor.co.uk/news/rss'),
+        (u'How-tos', u'http://www.pcadvisor.co.uk/how-to/rss'),
+        (u'Reviews', u'http://www.pcadvisor.co.uk/review/rss'),
+        (u'Video Content', u'http://www.pcadvisor.co.uk/video/rss'),
+        (u'iPhone', u'http://www.pcadvisor.co.uk/latest/iphone/rss'),
+        (u'iPad', u'http://www.pcadvisor.co.uk/latest/ipad/rss'),
+        (u'Mac', u'http://www.pcadvisor.co.uk/latest/mac/rss'),
+        (u'Apple', u'http://www.pcadvisor.co.uk/latest/apple/rss'),
    ]
--- a/recipes/phys_org.recipe
+++ b/recipes/phys_org.recipe
@ -19,8 +19,6 @@ class HindustanTimes(BasicNewsRecipe):
         'http://phys.org/rss-feed/physics-news/'),
        ('Space and Earth',
         'http://phys.org/rss-feed/space-news/'),
-        ('Electronics',
-         'http://phys.org/rss-feed/electronics-news/'),
        ('Chemistry',
         'http://phys.org/rss-feed/chemistry-news/'),
        ('Biology',
--- a/recipes/politiken_dk.recipe
+++ b/recipes/politiken_dk.recipe
@ -22,14 +22,17 @@ class Politiken_dk(BasicNewsRecipe):
    encoding = 'cp1252'
    language = 'da'

-    extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h1{font-family: Georgia,"Times New Roman",Times,serif } '
+    extra_css = (' body{font-family: Arial,Helvetica,sans-serif } '
+                 'h1{font-family: Georgia,"Times New Roman",Times,serif } ')

    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
+        'comment': description,
+        'tags': category,
+        'publisher': publisher,
+        'language': language
    }

    feeds = [
-
        (u'Tophistorier', u'http://politiken.dk/rss/tophistorier.rss'),
        (u'Seneste nyt', u'http://politiken.dk/rss/senestenyt.rss'),
        (u'Mest laeste', u'http://politiken.dk/rss/mestlaeste.rss'),
@ -41,7 +44,6 @@ class Politiken_dk(BasicNewsRecipe):
        (u'Kultur', u'http://politiken.dk/rss/kultur.rss'),
        (u'Sport', u'http://politiken.dk/rss/sport.rss'),
        (u'Uddannelse', u'http://politiken.dk/rss/uddannelse.rss'),
-    (u'Videnskab', u'http://politiken.dk/rss/videnskab.rss')
    ]
    remove_tags_before = dict(name='h1')
    remove_tags = [
--- a/recipes/rollingstone.recipe
+++ b/recipes/rollingstone.recipe
@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 rollingstone.com
 '''

-import re
 from calibre.web.feeds.news import BasicNewsRecipe


@ -22,44 +21,8 @@ class RollingStone(BasicNewsRecipe):
    language = 'en'
    remove_empty_feeds = True
    publication_type = 'magazine'
-    masthead_url = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
-    extra_css             = """
-                               body{font-family: Georgia,Times,serif }
-                               img{margin-bottom: 0.4em; display:block}
-                            """
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
-
-    preprocess_regexps = [
-        (re.compile(r'xml:lang="en">.*?<head>', re.DOTALL | re.IGNORECASE), lambda match: 'xml:lang="en">\n<head>\n'), (re.compile(
-            r'</title>.*?</head>', re.DOTALL | re.IGNORECASE), lambda match: '</title>\n</head>\n')
-    ]
-
-    keep_only_tags = [
-        dict(attrs={'class': ['headerImgHolder', 'headerContent']}), dict(name='div', attrs={'id': [
-            'teaser', 'storyTextContainer']}), dict(name='div', attrs={'class': 'blogDetailModule clearfix'})
-    ]
-
-    remove_tags = [
-        dict(name=['meta', 'iframe', 'object', 'embed']), dict(
-            attrs={'id': 'mpStoryHeader'}), dict(attrs={'class': 'relatedTopics'})
-    ]
-    remove_attributes = ['lang', 'onclick', 'width', 'height', 'name']
-    remove_tags_before = dict(attrs={'class': 'bloggerInfo'})
-    remove_tags_after = dict(attrs={'class': 'relatedTopics'})
+    auto_cleanup = True

    feeds = [
-
        (u'All News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
-    (u'All Blogs', u'http://www.rollingstone.com/siteServices/rss/allBlogs'),
-    (u'Movie Reviews', u'http://www.rollingstone.com/siteServices/rss/movieReviews'),
-    (u'Album Reviews', u'http://www.rollingstone.com/siteServices/rss/albumReviews'),
-    (u'Song Reviews', u'http://www.rollingstone.com/siteServices/rss/songReviews')
    ]
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
--- a/recipes/rstones.recipe
+++ b/recipes/rstones.recipe
@ -1,45 +0,0 @@
-#!/usr/bin/env  python2
-__license__ = 'GPL v3'
-__author__ = 'Tony Stegall'
-__copyright__ = '2010, Tony Stegall or Tonythebookworm on mobileread.com'
-__version__ = 'v1.01'
-__date__ = '07, October 2010'
-__description__ = 'Rolling Stones Mag'
-
-'''
-http://www.rollingstone.com
-'''
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class RollingStones(BasicNewsRecipe):
-    __author__ = 'Tony Stegall'
-    description = 'Rolling Stones Mag'
-    cover_url = 'http://gallery.celebritypro.com/data/media/648/kid-rock-rolling-stone-cover.jpg'
-    masthead_url = 'http://origin.myfonts.com/s/ec/cc-200804/Rolling_Stone-logo.gif'
-
-    title = 'Rolling Stones Mag'
-    category = 'Music Reviews, Movie Reviews, entertainment news'
-
-    language = 'en'
-    timefmt = '[%a, %d %b, %Y]'
-
-    oldest_article = 15
-    max_articles_per_feed = 25
-    use_embedded_content = False
-    no_stylesheets = True
-    auto_cleanup = True
-
-    feeds = [
-        (u'News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
-        (u'Blogs', u'http://www.rollingstone.com/siteServices/rss/allBlogs'),
-        (u'Movie Reviews', u'http://www.rollingstone.com/siteServices/rss/movieReviews'),
-        (u'Album Reviews', u'http://www.rollingstone.com/siteServices/rss/albumReviews'),
-        (u'Song Reviews', u'http://www.rollingstone.com/siteServices/rss/songReviews'),
-
-
-    ]
-
-    def print_version(self, url):
-        return url + '?print=true'
--- a/recipes/sfbg.recipe
+++ b/recipes/sfbg.recipe
@ -14,6 +14,5 @@ class SanFranciscoBayGuardian(BasicNewsRecipe):
    ]

    feeds = [
-        ('sfbg', 'http://www.sfbg.com/rss.xml'),
+        ('sfbg', 'http://www.sfbg.com/feed/'),
    ]
-
--- a/recipes/shacknews.recipe
+++ b/recipes/shacknews.recipe
@ -9,23 +9,9 @@ class Shacknews(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    language = 'en'
-
    no_stylesheets = True
-    remove_tags = [dict(name='div', attrs={'class': ['nuggets', 'comments']}),
-                   dict(name='p', attrs={'class': 'videoembed'})]
-    keep_only_tags = [dict(name='div', attrs={'class': 'story'})]
+    auto_cleanup = True
+
    feeds = [
-        (u'Latest News', u'http://feed.shacknews.com/shackfeed.xml'),
-        (u'PC', u'http://feed.shacknews.com/extras/tag_rss.x/PC'),
-        (u'Wii', u'http://feed.shacknews.com/extras/tag_rss.x/Nintendo+Wii'),
-        (u'Xbox 360', u'http://feed.shacknews.com/extras/tag_rss.x/Xbox+360'),
-        (u'Playstation 3',
-         u'http://feed.shacknews.com/extras/tag_rss.x/PlayStation+3'),
-        (u'PSP', u'http://feed.shacknews.com/extras/tag_rss.x/PSP'),
-        (u'Nintendo DS', u'http://feed.shacknews.com/extras/tag_rss.x/Nintendo+DS'),
-        (u'iPhone', u'http://feed.shacknews.com/extras/tag_rss.x/iPhone'),
-        (u'DLC', u'http://feed.shacknews.com/extras/tag_rss.x/DLC'),
-        (u'Valve', u'http://feed.shacknews.com/extras/tag_rss.x/Valve'),
-        (u'Electronic Arts',
-         u'http://feed.shacknews.com/extras/tag_rss.x/Electronic+Arts')
+        (u'Latest News', u'http://www.shacknews.com/shackfeed.xml'),
    ]
--- a/recipes/staradvertiser.recipe
+++ b/recipes/staradvertiser.recipe
@ -14,69 +14,19 @@ class Starbulletin(BasicNewsRecipe):
    publisher = 'Honolulu Star-Advertiser'
    category = 'news, Honolulu, Hawaii'
    oldest_article = 2
-    needs_subscription = True
    max_articles_per_feed = 100
    language = 'en'
    no_stylesheets = True
    use_embedded_content = False
    encoding = 'utf8'
    publication_type = 'newspaper'
-    masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
-#    extra_css             = """
-#                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
-#                                h1,.brown,.hsa_postCredit{color: #663300}
-#                                .storyDeck{font-size: 1.2em; font-weight: bold}
-#                                img{display: block}
-#                            """
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
-    }
-    keep_only_tags = [
-        dict(attrs={'id': 'hsa_storyTitle'}), dict(attrs={'id': 'hsa_storyTitle article-important'}), dict(attrs={'class': ['hsa_dateStamp', 'hsa_postCredit', 'storyDeck']}), dict(name='span', attrs={'class': ['hsa_dateStamp', 'hsa_postCredit']}), dict(name='span', attrs={'class': ['hsa_dateStamp article-important', 'hsa_postCredit article-important']}), dict(name='div', attrs={'class': 'storytext article-important'}), dict(name='div', attrs={'class': 'storytext'})  # noqa
-    ]
-    remove_tags = [
-        # removed 'span' from preceding list to permit keeping of author and
-        dict(name=['object', 'link', 'script', 'meta',
-                   'base', 'iframe'])        # timestamp
-        , dict(attrs={'class': ['insideStoryImage', 'insideStoryAd']}), dict(attrs={'name': 'fb_share'})
-    ]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            br.open('http://www.staradvertiser.com/manage/Login/')
-            br.select_form(name='loginForm')
-            br['email'] = self.username
-            br['password'] = self.password
-            br.submit()
-        return br
+    auto_cleanup = True

    feeds = [
-
-    (u'Breaking News', u'http://www.staradvertiser.com/news/breaking/index.rss'),
-    (u'News', u'http://www.staradvertiser.com/newspremium/index.rss'),
-    (u'Business', u'http://www.staradvertiser.com/businesspremium/index.rss'),
-    (u'Sports', u'http://www.staradvertiser.com/sportspremium/index.rss'),
-    (u'Features', u'http://www.staradvertiser.com/featurespremium/index.rss')
+        (u'Breaking News',
+         u'http://www.staradvertiser.com/category/breaking-news/feed/'),
+        (u'Business', u'http://www.staradvertiser.com/business/feed/'),
+        (u'Sports', u'http://www.staradvertiser.com/sports/feed/'),
+        (u'Features',
+         u'http://www.staradvertiser.com/featurespremium/index.rss')
    ]
-
-    def preprocess_html(self, soup):
-        for item in soup.findAll(style=True):
-            del item['style']
-        for item in soup.findAll('a'):
-            limg = item.find('img')
-            if item.string is not None:
-                str = item.string
-                item.replaceWith(str)
-            else:
-                if limg:
-                    item.name = 'div'
-                    item.attrs = []
-                else:
-                    str = self.tag_to_string(item)
-                    item.replaceWith(str)
-        for item in soup.findAll('img'):
-            if not item.has_key('alt'):  # noqa
-                item['alt'] = 'image'
-        return soup
--- a/recipes/tech_world.recipe
+++ b/recipes/tech_world.recipe
@ -10,17 +10,16 @@ http://www.techworld.com/
 '''

 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ptempfile import PersistentTemporaryFile


 class techworld(BasicNewsRecipe):
    __author__ = 'Lorenzo Vigentini'
    description = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK'  # noqa
-    cover_url = 'http://www.techworld.com/graphics/header/site_logo.jpg'

    title = 'TechWorld'
    publisher = 'IDG Communication'
-    category = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
+    category = ('Apple, Mac, video, computing, product reviews, '
+                'editing, cameras, production')

    language = 'en'
    timefmt = '[%a, %d %b, %Y]'
@ -32,60 +31,16 @@ class techworld(BasicNewsRecipe):

    remove_javascript = True
    no_stylesheets = True
-
-    temp_files = []
-    articles_are_obfuscated = True
-
-    def get_obfuscated_article(self, url):
-        br = self.get_browser()
-        br.open(url)
-        response = br.follow_link(url_regex='?getDynamicPage&print$', nr=0)
-        html = response.read()
-        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-        self.temp_files[-1].write(html)
-        self.temp_files[-1].close()
-        return self.temp_files[-1].name
-
-    keep_only_tags = [
-        dict(name='div', attrs={'id': 'articleBody'}),
-        dict(name='h2', attrs={'class': 'blogTitle'}),
-        dict(name='h3', attrs={'class': 'blogger'}),
-    ]
-
-    remove_tags = [
-        dict(name='div', attrs={'class': ['submissionBar', 'mpuContainer']}),
-        dict(name='div', attrs={'id': [
-             'breadcrumb', 'mainContentSidebar', 'articleIconsList', 'loginSubscribeBoxout']}),
-        dict(name='ul', attrs={'class': 'articleIconsList'})
-    ]
-    remove_tags_after = [
-        dict(name='div', attrs={'id': 'articleFooter'})
-    ]
+    auto_cleanup = True

    feeds = [
-        (u'News', u'http://www.techworld.com/rss/feeds/techworld-news.xml'),
-        (u'How-Tos', u'http://www.techworld.com/rss/feeds/techworld-how-tos.xml'),
-        (u'Reviews', u'http://www.techworld.com/rss/feeds/techworld-reviews.xml'),
-        (u'Features', u'http://www.techworld.com/rss/feeds/techworld-features.xml'),
-        (u'Storage', u'http://www.techworld.com/rss/feeds/techworld-storage.xml'),
-        (u'Applications',
-         u'http://www.techworld.com/rss/feeds/techworld-applications.xml'),
-        (u'Virtualization',
-         u'http://www.techworld.com/rss/feeds/techworld-virtualisation.xml'),
-        (u'Personal Tech',
-         u'http://www.techworld.com/rss/feeds/techworld-personal-tech.xml'),
-        (u'Green IT', u'http://www.techworld.com/rss/feeds/techworld-green-it.xml'),
-        (u'Security', u'http://www.techworld.com/rss/feeds/techworld-security.xml'),
-        (u'Operating Systems',
-         u'http://www.techworld.com/rss/feeds/techworld-operating-systems.xml'),
-        (u'Networking', u'http://www.techworld.com/rss/feeds/techworld-networking.xml'),
-        (u'Mobile and Wireless',
-         u'http://www.techworld.com/rss/feeds/techworld-mobile-wireless.xml'),
-        (u'Data Centre', u'http://www.techworld.com/rss/feeds/techworld-data-centre.xml'),
-        (u'SME', u'http://www.techworld.com/rss/feeds/techworld-sme.xml'),
-        (u'TechWorld Blogs', u'http://blogs.techworld.com/atom.xml')
+        (u'News', u'http://www.techworld.com/news/rss'),
+        (u'Tutorial', u'http://www.techworld.com/tutorial/rss'),
+        (u'Reviews', u'http://www.techworld.com/review/rss'),
+        (u'Features', u'http://www.techworld.com/features/rss'),
+        (u'Analysis', u'http://www.techworld.com/analysis/rss'),
+        (u'Galleries',
+         u'http://www.techworld.com/picture-gallery/rss'),
+        (u'TechWorld Blogs',
+         u'http://www.techworld.com/blog/rss'),
    ]
-
-    extra_css = '''
-                img {align:left;}
-                '''
--- a/recipes/technology_review.recipe
+++ b/recipes/technology_review.recipe
@ -18,12 +18,14 @@ class TechnologyReview(BasicNewsRecipe):
    .subheadline {font: italic large}
    """
    feeds = [
-        (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
-        (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
-        (u'Communications',
-         u'http://feeds.technologyreview.com/technology_review_Communications'),
-        (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
-        (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
-        (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
-        (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
+        (u'Computing',
+         u'http://feeds.technologyreview.com/technology_review_Computing'),
+        (u'Energy',
+         u'http://feeds.technologyreview.com/technology_review_Energy'),
+        (u'Materials',
+         u'http://feeds.technologyreview.com/technology_review_Materials'),
+        (u'Biomedicine',
+         u'http://feeds.technologyreview.com/technology_review_Biotech'),
+        (u'Business',
+         u'http://feeds.technologyreview.com/technology_review_Biztech')
    ]
--- a/recipes/the_budget_fashionista.recipe
+++ b/recipes/the_budget_fashionista.recipe
@ -22,20 +22,8 @@ class TheBudgetFashionista(BasicNewsRecipe):
    category = 'news, fashion, comsetics, women'
    lang = 'en-US'
    language = 'en'
+    auto_cleanup = True

-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': lang
-    }
-
-    keep_only_tags = [dict(name='div', attrs={'class': 'columnLeft'})]
-    remove_tags_after = dict(name='div', attrs={'class': 'postDetails'})
-    remove_tags = [dict(name=['object', 'link', 'script',
-                              'iframe', 'form', 'login-button'])]
-
-    feeds = [(u'Articles', u'http://www.thebudgetfashionista.com/feeds/atom/')]
-
-    def preprocess_html(self, soup):
-        for it in soup.findAll('img'):
-            if it.parent.name == 'a':
-                it.parent.name = 'div'
-        return soup
+    feeds = [(u'Articles',
+              u'http://feeds.feedburner.com/TheBudgetFashionista')
+    ]
--- a/recipes/the_week_magazine_free.recipe
+++ b/recipes/the_week_magazine_free.recipe
@ -23,8 +23,5 @@ class TheWeek(BasicNewsRecipe):
    language = 'en'
    auto_cleanup = True
    feeds = [
-        (u'News-Opinion', u'http://theweek.com/section/index/news_opinion.rss'),
-        (u'Business', u'http://theweek.com/section/index/business.rss'),
-        (u'Arts-Life', u'http://theweek.com/section/index/arts_life.rss'),
-        (u'Cartoons', u'http://theweek.com/section/index/cartoon_wit/0/all-cartoons.rss')
+        (u'Latest articles', u'http://theweek.com/rss.xml'),
    ]
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -14,7 +14,6 @@ class USAToday(BasicNewsRecipe):
    title = 'USA Today'
    __author__ = 'Kovid Goyal'
    description = 'newspaper'
-    cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/USAT.jpg'
    encoding = 'utf-8'
    publisher = 'usatoday.com'
    category = 'news, usa'
@ -28,25 +27,42 @@ class USAToday(BasicNewsRecipe):
    filterDuplicates = True

    extra_css = '''
-                    h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
-                    #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
-                    #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                    h1, h2 {
+                        font-size:xx-large;
+                        font-family:Arial,Helvetica,sans-serif;}
+                    #post-attributes, .info,
+                    .clear {
+                        font-size:xx-small; color:#4D4D4D;
+                        font-family:Arial,Helvetica,sans-serif;
+                    }
+                    #post-body,
+                    #content {
+                        font-size:medium;
+                        font-family:Arial,Helvetica,sans-serif;
+                    }
                '''

    feeds = [
-        ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
-        ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
-        ('Personal Tech', 'http://rssfeeds.usatoday.com/UsatodaycomTech-PersonalTalk'),
-        ('Science', 'http://rssfeeds.usatoday.com/TP-ScienceFair'),
-        ('Health', 'http://rssfeeds.usatoday.com/UsatodaycomHealth-TopStories'),
-        ('Travel Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomTravel-TopStories'),
-        ('Money Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomMoney-TopStories'),
+        ('Top Headlines',
+         'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
+        ('Tech Headlines',
+         'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
+        ('Personal Tech',
+         'http://rssfeeds.usatoday.com/UsatodaycomTech-PersonalTalk'),
+        ('Health',
+         'http://rssfeeds.usatoday.com/UsatodaycomHealth-TopStories'),
+        ('Travel Headlines',
+         'http://rssfeeds.usatoday.com/UsatodaycomTravel-TopStories'),
+        ('Money Headlines',
+         'http://rssfeeds.usatoday.com/UsatodaycomMoney-TopStories'),
        ('Entertainment Headlines',
         'http://rssfeeds.usatoday.com/usatoday-LifeTopStories'),
-        ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
-        ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
-        ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
-        ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
+        ('Sport Headlines',
+         'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
+        ('Weather Headlines',
+         'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
+        ('Most Popular',
+         'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
    ]

    auto_cleanup = True
--- a/recipes/usnews.recipe
+++ b/recipes/usnews.recipe
@ -20,40 +20,14 @@ class LaPrensa(BasicNewsRecipe):
    use_embedded_content = False
    encoding = 'utf-8'
    language = 'en'
-
-    html2lrf_options = [
-        '--comment', description, '--category', category, '--publisher', publisher
-    ]
-
-    html2epub_options = 'publisher="' + publisher + \
-        '"\ncomments="' + description + '"\ntags="' + category + '"'
-
-    keep_only_tags = [
-        dict(name='h1'), dict(name='div', attrs={'id': ['dateline']}), dict(
-            name='div', attrs={'class': ['blogCredit', 'body']})
-    ]
+    auto_cleanup = True

    feeds = [
-
        (u'Homepage', u'http://www.usnews.com/rss/usnews.rss'),
-    (u'Health', u'http://www.usnews.com/rss/health/index.rss'),
-    (u'Nation & World', u'http://www.usnews.com/rss/news/index.rss'),
-    (u'Money & Business', u'http://www.usnews.com/rss/business/index.rss'),
-    (u'Education', u'http://www.usnews.com/rss/education/index.rss'),
-    (u'Opinion', u'http://www.usnews.com/rss/opinion/index.rss'),
-    (u'Science', u'http://www.usnews.com/rss/science/index.rss')
+        (u'Health', u'http://www.usnews.com/rss/health'),
+        (u'Nation & World', u'http://www.usnews.com/rss/news'),
+        (u'Money & Business', u'http://www.usnews.com/rss/money'),
+        (u'Education', u'http://www.usnews.com/rss/education'),
+        (u'Opinion', u'http://www.usnews.com/rss/opinion'),
+        (u'Science', u'http://www.usnews.com/rss/science')
    ]
-
-    def print_version(self, url):
-        return url.replace('.html', '_print.html')
-
-    def get_article_url(self, article):
-        raw = article.get('link',  None)
-        artcl, sep, unneeded = raw.rpartition('?')
-        return artcl
-
-    def preprocess_html(self, soup):
-        del soup.body['onload']
-        for item in soup.findAll(style=True):
-            del item['style']
-        return soup
--- a/recipes/waco_tribune.recipe
+++ b/recipes/waco_tribune.recipe
@ -11,10 +11,10 @@ class AdvancedUserRecipe1278773519(BasicNewsRecipe):
    max_articles_per_feed = 100

    feeds = [
-        (u'News', u'http://www.wacotrib.com/news/index.rss2'),
-        (u'Sports', u'http://www.wacotrib.com/sports/index.rss2'),
-        (u'AccessWaco', u'http://www.wacotrib.com/accesswaco/index.rss2'),
-        (u'Opinions', u'http://www.wacotrib.com/opinion/index.rss2')
+        (u'News', u'http://www.wacotrib.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/ap_nation,news/ap_nation/*&f=rss'),
+        (u'Sports', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=sports*&f=rss'),
+        (u'AccessWaco', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=entertainment/accesswaco*&f=rss'),
+        (u'Opinions', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=opinion*&f=rss')
    ]

    remove_javascript = True
@ -23,13 +23,4 @@ class AdvancedUserRecipe1278773519(BasicNewsRecipe):
    language = 'en'
    encoding = 'utf-8'
    conversion_options = {'linearize_tables': True}
-    masthead_url = 'http://media.wacotrib.com/designimages/wacotrib_logo.jpg'
-    keep_only_tags = [
-        dict(name='div', attrs={'class': 'twoColumn left'}),
-    ]
-    remove_tags = [
-        dict(name='div', attrs={'class': 'right blueLinks'}),
-    ]
-    remove_tags_after = [
-        dict(name='div', attrs={'class': 'dottedRule'}),
-    ]
+    auto_cleanup = True
--- a/recipes/wash_post.recipe
+++ b/recipes/wash_post.recipe
@ -4,7 +4,6 @@ __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 www.washingtonpost.com
 '''

-from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe


@ -23,55 +22,18 @@ class TheWashingtonPost(BasicNewsRecipe):
    language = 'en'
    remove_empty_feeds = True
    publication_type = 'newspaper'
-    masthead_url = 'http://www.washingtonpost.com/rw/sites/twpweb/img/logos/twp_logo_300.gif'
-    cover_url = strftime(
-        'http://www.washingtonpost.com/rw/WashingtonPost/Content/Epaper/%Y-%m-%d/Ax1.pdf')
-    extra_css             = """
-                               body{font-family: Georgia,serif }
-                            """
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
-
-    keep_only_tags = [
-        dict(attrs={'id': ['content', 'entryhead', 'entrytext']})]
-    remove_tags = [
-        dict(name=['meta', 'link', 'iframe', 'base']), dict(
-            attrs={'id': 'multimedia-leaf-page'})
-    ]
-    remove_attributes = ['lang', 'property', 'epochtime',
-                         'datetitle', 'pagetype', 'contenttype', 'comparetime']
+    auto_cleanup = True

    feeds = [
-
        (u'World', u'http://feeds.washingtonpost.com/rss/world'),
        (u'National', u'http://feeds.washingtonpost.com/rss/national'),
-    (u'White House', u'http://feeds.washingtonpost.com/rss/politics/whitehouse'),
+        (u'White House',
+         u'http://feeds.washingtonpost.com/rss/politics/whitehouse'),
        (u'Business', u'http://feeds.washingtonpost.com/rss/business'),
        (u'Opinions', u'http://feeds.washingtonpost.com/rss/opinions'),
-    (u'Investigations', u'http://feeds.washingtonpost.com/rss/investigations'),
        (u'Local', u'http://feeds.washingtonpost.com/rss/local'),
-    (u'Entertainment', u'http://feeds.washingtonpost.com/rss/entertainment'),
+        (u'Entertainment',
+         u'http://feeds.washingtonpost.com/rss/entertainment'),
        (u'Sports', u'http://feeds.washingtonpost.com/rss/sports'),
        (u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'),
-    (u'Special Reports', u'http://feeds.washingtonpost.com/rss/national/special-reports')
    ]
-
-    def print_version(self, url):
-        if '_story.html' in url:
-            return url.replace('_story.html', '_print.html')
-        return url
-
-    def get_article_url(self, article):
-        link = BasicNewsRecipe.get_article_url(self, article)
-        if article.id.startswith('http'):
-            link = article.id
-        if 'washingtonpost.com' not in link:
-            self.log('Skipping ads:', link)
-            return None
-        for it in ['_video.html', '_gallery.html', '_links.html']:
-            if it in link:
-                self.log('Skipping non-article:', link)
-                return None
-        return link
--- a/recipes/worldcrunch.recipe
+++ b/recipes/worldcrunch.recipe
@ -14,5 +14,5 @@ class Worldcrunch(BasicNewsRecipe):

    feeds = [
        ('News',
-         'http://www.worldcrunch.com/feed'),
+         'http://www.worldcrunch.com/rss/rss.php'),
    ]