Merge branch 'master' of https://github.com/CoderAllan/calibre

2025-09-14 16:18:05 -04:00 · 2016-10-14 09:12:25 +05:30 · 2016-10-14 09:12:25 +05:30 · 3def2109c0
commit 3def2109c0
parent 3b644906a4 3b21bf95ce
20 changed files with 137 additions and 494 deletions
--- a/recipes/moscow_times.recipe
+++ b/recipes/moscow_times.recipe
@ -19,54 +19,14 @@ class Moscowtimes(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    remove_empty_feeds = True
    encoding = 'cp1251'
    masthead_url = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif'
    publication_type = 'newspaper'
    auto_cleanup = True
    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
    extra_css      = '''
                        h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
                        .article_date{ font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; color:#000000; font-size: x-small;}
                        .autors{color:#999999 ; font-weight: bold ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
                        .photoautors{ color:#999999 ; font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size: x-small; }
                        .text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
                        '''
    feeds = [
-
+        (u'Top Stories', u'https://themoscowtimes.com/feeds/main.xml'),
-    (u'Top Stories', u'http://www.themoscowtimes.com/rss/top'),
+        (u'Moscow', u'https://themoscowtimes.com/feeds/moscow.xml'),
-    (u'Current Issue', u'http://www.themoscowtimes.com/rss/issue'),
+        (u'Russia', u'https://themoscowtimes.com/feeds/russia.xml'),
-    (u'News', u'http://www.themoscowtimes.com/rss/news'),
+        (u'World', u'https://themoscowtimes.com/feeds/world.xml'),
-    (u'Business', u'http://www.themoscowtimes.com/rss/business'),
+        (u'Business', u'https://themoscowtimes.com/feeds/business.xml'),
-    (u'Art and Ideas', u'http://www.themoscowtimes.com/rss/art'),
+        (u'Opinion', u'https://themoscowtimes.com/feeds/opinion.xml')
    (u'Opinion', u'http://www.themoscowtimes.com/rss/opinion')
    ]
    keep_only_tags = [dict(name='div', attrs={'id': 'content'})]
    remove_tags = [
        dict(name='div', attrs={'class': ['photo_nav', 'phototext']}), dict(
            name=['iframe', 'meta', 'base', 'link', 'embed', 'object'])
    ]
    def preprocess_html(self, soup):
        for lnk in soup.findAll('a'):
            if lnk.string is not None:
                ind = self.tag_to_string(lnk)
                lnk.replaceWith(ind)
        return soup
    def print_version(self, url):
        return url.replace('.themoscowtimes.com/', '.themoscowtimes.com/print/')
    def get_cover_url(self):
        cover_url = None
        href = 'http://www.themoscowtimes.com/pdf/'
        soup = self.index_to_soup(href)
        div = soup.find('div', attrs={'class': 'left'})
        if div:
            a = div.find('a')
            if a:
                cover_url = 'http://www.themoscowtimes.com' + a.img['src']
        return cover_url
--- a/recipes/newsstraitstimes.recipe
+++ b/recipes/newsstraitstimes.recipe
@ -1,4 +1,3 @@
 __license__ = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -11,7 +10,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class Newstraitstimes(BasicNewsRecipe):
    title = 'New Straits Times from Malaysia'
    __author__ = 'Darko Miletic'
-    description = 'Learning Curve, Sunday People, New Straits Times from Malaysia'
+    description = ('Learning Curve, Sunday People, '
                   'New Straits Times from Malaysia')
    publisher = 'nst.com.my'
    category = 'news, politics, Malaysia'
    oldest_article = 2
@ -20,13 +20,6 @@ class Newstraitstimes(BasicNewsRecipe):
    encoding = 'cp1252'
    use_embedded_content = False
    language = 'en'
-    masthead_url = 'http://www.nst.com.my/Current_News/NST/Images/new-nstonline.jpg'
+    auto_cleanup = True
-    conversion_options = {
+    feeds = [(u'Articles', u'http://www.nst.com.my/latest.xml')]
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
    remove_tags = [dict(name=['link', 'table'])]
    keep_only_tags = dict(name='div', attrs={'id': 'haidah'})
    feeds = [(u'Articles', u'http://www.nst.com.my/rss/allSec')]
--- a/recipes/oldnewthing.recipe
+++ b/recipes/oldnewthing.recipe
@ -19,14 +19,6 @@ class OldNewThing(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = False
    publication_type = 'blog'
-    extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif} .code{font-family: "Lucida Console",monospace} '
+    auto_cleanup = True
-    conversion_options = {
+    feeds = [(u'Posts', u'https://blogs.msdn.microsoft.com/oldnewthing/feed')]
        'comment': description, 'tags': 'blog, windows, microsoft, programming', 'publisher': 'Raymond Chen', 'language': language
    }
    remove_attributes = ['width', 'height']
    keep_only_tags = [dict(attrs={'class': 'full-post'})]
    remove_tags = [
        dict(attrs={'class': ['post-attributes', 'post-tags', 'post-actions']})]
    feeds = [(u'Posts', u'http://blogs.msdn.com/oldnewthing/rss.xml')]
--- a/recipes/pc_advisor.recipe
+++ b/recipes/pc_advisor.recipe
@ -32,56 +32,16 @@ class pcAdvisor(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True
-
+    auto_cleanup = True
    keep_only_tags = [
        dict(name='div', attrs={'id': 'articlecontent'})
    ]
    remove_tags = [
        dict(name='div', attrs={'id': ['crosssitesignup', 'submitarticle', 'dontPrint',
                                       'commentsForm', 'userReviewFormContainer', 'reevooContainerId']}),
        dict(name='div', attrs={'class': 'mpu'}),
        dict(name='p', attrs={'id': 'articlePageList'}),
        dict(name='div', attrs={
             'style': ['margin: 0pt 10px 5px;', 'margin: 0pt 10px 5px;']}),
        dict(name='p', attrs={'class': 'dontPrint'}),
        dict(name='h2', attrs={'class': 'sectionTitle'}),
        dict(name='a', attrs={'title': 'Subscribe to PC Advisor'}),
        dict(name='a', attrs={'name': 'revooContent'}),
        {'name': ['form', 'script', 'link']}
    ]
    remove_tags_after = [
        dict(name='p', attrs={'id': 'crosssitesignup'})
    ]
    def get_article_url(self, article):
        return article.get('guid',  None)
    feeds = [
-        (u'News Headlines', u'http://www.pcadvisor.co.uk/rss/feeds/pcanews.xml'),
+        (u'Latest', u'http://www.pcadvisor.co.uk/latest/rss'),
-        (u'Reviews', u'http://www.pcadvisor.co.uk/rss/feeds/pcareviews.xml'),
+        (u'News', u'http://www.pcadvisor.co.uk/news/rss'),
-        (u'New Products',
+        (u'How-tos', u'http://www.pcadvisor.co.uk/how-to/rss'),
-         u'http://www.pcadvisor.co.uk/rss/feeds/blog18.xml'),
+        (u'Reviews', u'http://www.pcadvisor.co.uk/review/rss'),
-        (u'PC Advisor Blog',
+        (u'Video Content', u'http://www.pcadvisor.co.uk/video/rss'),
-         u'http://www.pcadvisor.co.uk/rss/feeds/blog4.xml'),
+        (u'iPhone', u'http://www.pcadvisor.co.uk/latest/iphone/rss'),
-        (u'PC Security',
+        (u'iPad', u'http://www.pcadvisor.co.uk/latest/ipad/rss'),
-         u'http://www.pcadvisor.co.uk/rss/feeds/pca-security.xml'),
+        (u'Mac', u'http://www.pcadvisor.co.uk/latest/mac/rss'),
-        (u'Laptops', u'http://www.pcadvisor.co.uk/rss/feeds/pca-laptop.xml'),
+        (u'Apple', u'http://www.pcadvisor.co.uk/latest/apple/rss'),
        (u'Green Computing',
         u'http://www.pcadvisor.co.uk/rss/feeds/pca-green-computing.xml'),
        (u'Internet and broadband',
         u'http://www.pcadvisor.co.uk/rss/feeds/pca-internet.xml'),
        (u'Prones and PDAs',
         u'http://www.pcadvisor.co.uk/rss/feeds/pca-phones.xml'),
        (u'Software', u'http://www.pcadvisor.co.uk/rss/feeds/pca-software.xml'),
        (u'Small Business',
         u'http://www.pcadvisor.co.uk/rss/feeds/pca-small-business.xml'),
        (u'Photo and video',
         u'http://www.pcadvisor.co.uk/rss/feeds/pca-photo-video.xml'),
        (u'Mac News', u'http://www.pcadvisor.co.uk/rss/feeds/pca-mac.xml'),
        (u'Linux', u'http://www.pcadvisor.co.uk/rss/feeds/pca-linux.xml'),
        (u'WiFi and Networking',
         u'http://www.pcadvisor.co.uk/rss/feeds/pca-networking.xml'),
        (u'Gadgets', u'http://www.pcadvisor.co.uk/rss/feeds/pca-gadgets.xml')
    ]
--- a/recipes/phys_org.recipe
+++ b/recipes/phys_org.recipe
@ -19,8 +19,6 @@ class HindustanTimes(BasicNewsRecipe):
         'http://phys.org/rss-feed/physics-news/'),
        ('Space and Earth',
         'http://phys.org/rss-feed/space-news/'),
        ('Electronics',
         'http://phys.org/rss-feed/electronics-news/'),
        ('Chemistry',
         'http://phys.org/rss-feed/chemistry-news/'),
        ('Biology',
--- a/recipes/politiken_dk.recipe
+++ b/recipes/politiken_dk.recipe
@ -22,26 +22,28 @@ class Politiken_dk(BasicNewsRecipe):
    encoding = 'cp1252'
    language = 'da'
-    extra_css = ' body{font-family: Arial,Helvetica,sans-serif } h1{font-family: Georgia,"Times New Roman",Times,serif } '
+    extra_css = (' body{font-family: Arial,Helvetica,sans-serif } '
                 'h1{font-family: Georgia,"Times New Roman",Times,serif } ')
    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
+        'comment': description,
        'tags': category,
        'publisher': publisher,
        'language': language
    }
    feeds = [
-
+        (u'Tophistorier', u'http://politiken.dk/rss/tophistorier.rss'),
-    (u'Tophistorier', u'http://politiken.dk/rss/tophistorier.rss'),
+        (u'Seneste nyt', u'http://politiken.dk/rss/senestenyt.rss'),
-    (u'Seneste nyt', u'http://politiken.dk/rss/senestenyt.rss'),
+        (u'Mest laeste', u'http://politiken.dk/rss/mestlaeste.rss'),
-    (u'Mest laeste', u'http://politiken.dk/rss/mestlaeste.rss'),
+        (u'Danmark', u'http://politiken.dk/rss/indland.rss'),
-    (u'Danmark', u'http://politiken.dk/rss/indland.rss'),
+        (u'Politik', u'http://politiken.dk/rss/politik.rss'),
-    (u'Politik', u'http://politiken.dk/rss/politik.rss'),
+        (u'Klima', u'http://politiken.dk/rss/klima.rss'),
-    (u'Klima', u'http://politiken.dk/rss/klima.rss'),
+        (u'Internationalt', u'http://politiken.dk/rss/udland.rss'),
-    (u'Internationalt', u'http://politiken.dk/rss/udland.rss'),
+        (u'Erhverv', u'http://politiken.dk/rss/erhverv.rss'),
-    (u'Erhverv', u'http://politiken.dk/rss/erhverv.rss'),
+        (u'Kultur', u'http://politiken.dk/rss/kultur.rss'),
-    (u'Kultur', u'http://politiken.dk/rss/kultur.rss'),
+        (u'Sport', u'http://politiken.dk/rss/sport.rss'),
-    (u'Sport', u'http://politiken.dk/rss/sport.rss'),
+        (u'Uddannelse', u'http://politiken.dk/rss/uddannelse.rss'),
    (u'Uddannelse', u'http://politiken.dk/rss/uddannelse.rss'),
    (u'Videnskab', u'http://politiken.dk/rss/videnskab.rss')
    ]
    remove_tags_before = dict(name='h1')
    remove_tags = [
--- a/recipes/rollingstone.recipe
+++ b/recipes/rollingstone.recipe
@ -4,7 +4,6 @@ __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 rollingstone.com
 '''
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
@ -22,44 +21,8 @@ class RollingStone(BasicNewsRecipe):
    language = 'en'
    remove_empty_feeds = True
    publication_type = 'magazine'
-    masthead_url = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
+    auto_cleanup = True
    extra_css             = """
                               body{font-family: Georgia,Times,serif }
                               img{margin-bottom: 0.4em; display:block}
                            """
    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
    preprocess_regexps = [
        (re.compile(r'xml:lang="en">.*?<head>', re.DOTALL | re.IGNORECASE), lambda match: 'xml:lang="en">\n<head>\n'), (re.compile(
            r'</title>.*?</head>', re.DOTALL | re.IGNORECASE), lambda match: '</title>\n</head>\n')
    ]
    keep_only_tags = [
        dict(attrs={'class': ['headerImgHolder', 'headerContent']}), dict(name='div', attrs={'id': [
            'teaser', 'storyTextContainer']}), dict(name='div', attrs={'class': 'blogDetailModule clearfix'})
    ]
    remove_tags = [
        dict(name=['meta', 'iframe', 'object', 'embed']), dict(
            attrs={'id': 'mpStoryHeader'}), dict(attrs={'class': 'relatedTopics'})
    ]
    remove_attributes = ['lang', 'onclick', 'width', 'height', 'name']
    remove_tags_before = dict(attrs={'class': 'bloggerInfo'})
    remove_tags_after = dict(attrs={'class': 'relatedTopics'})
    feeds = [
-
+        (u'All News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
    (u'All News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
    (u'All Blogs', u'http://www.rollingstone.com/siteServices/rss/allBlogs'),
    (u'Movie Reviews', u'http://www.rollingstone.com/siteServices/rss/movieReviews'),
    (u'Album Reviews', u'http://www.rollingstone.com/siteServices/rss/albumReviews'),
    (u'Song Reviews', u'http://www.rollingstone.com/siteServices/rss/songReviews')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/rstones.recipe
+++ b/recipes/rstones.recipe
@ -1,45 +0,0 @@
 #!/usr/bin/env  python2
 __license__ = 'GPL v3'
 __author__ = 'Tony Stegall'
 __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobileread.com'
 __version__ = 'v1.01'
 __date__ = '07, October 2010'
 __description__ = 'Rolling Stones Mag'
 '''
 http://www.rollingstone.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class RollingStones(BasicNewsRecipe):
    __author__ = 'Tony Stegall'
    description = 'Rolling Stones Mag'
    cover_url = 'http://gallery.celebritypro.com/data/media/648/kid-rock-rolling-stone-cover.jpg'
    masthead_url = 'http://origin.myfonts.com/s/ec/cc-200804/Rolling_Stone-logo.gif'
    title = 'Rolling Stones Mag'
    category = 'Music Reviews, Movie Reviews, entertainment news'
    language = 'en'
    timefmt = '[%a, %d %b, %Y]'
    oldest_article = 15
    max_articles_per_feed = 25
    use_embedded_content = False
    no_stylesheets = True
    auto_cleanup = True
    feeds = [
        (u'News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
        (u'Blogs', u'http://www.rollingstone.com/siteServices/rss/allBlogs'),
        (u'Movie Reviews', u'http://www.rollingstone.com/siteServices/rss/movieReviews'),
        (u'Album Reviews', u'http://www.rollingstone.com/siteServices/rss/albumReviews'),
        (u'Song Reviews', u'http://www.rollingstone.com/siteServices/rss/songReviews'),
    ]
    def print_version(self, url):
        return url + '?print=true'
--- a/recipes/sfbg.recipe
+++ b/recipes/sfbg.recipe
@ -14,6 +14,5 @@ class SanFranciscoBayGuardian(BasicNewsRecipe):
    ]
    feeds = [
-        ('sfbg', 'http://www.sfbg.com/rss.xml'),
+        ('sfbg', 'http://www.sfbg.com/feed/'),
    ]
--- a/recipes/shacknews.recipe
+++ b/recipes/shacknews.recipe
@ -9,23 +9,9 @@ class Shacknews(BasicNewsRecipe):
    oldest_article = 7
    max_articles_per_feed = 100
    language = 'en'
    no_stylesheets = True
-    remove_tags = [dict(name='div', attrs={'class': ['nuggets', 'comments']}),
+    auto_cleanup = True
-                   dict(name='p', attrs={'class': 'videoembed'})]
+
    keep_only_tags = [dict(name='div', attrs={'class': 'story'})]
    feeds = [
-        (u'Latest News', u'http://feed.shacknews.com/shackfeed.xml'),
+        (u'Latest News', u'http://www.shacknews.com/shackfeed.xml'),
        (u'PC', u'http://feed.shacknews.com/extras/tag_rss.x/PC'),
        (u'Wii', u'http://feed.shacknews.com/extras/tag_rss.x/Nintendo+Wii'),
        (u'Xbox 360', u'http://feed.shacknews.com/extras/tag_rss.x/Xbox+360'),
        (u'Playstation 3',
         u'http://feed.shacknews.com/extras/tag_rss.x/PlayStation+3'),
        (u'PSP', u'http://feed.shacknews.com/extras/tag_rss.x/PSP'),
        (u'Nintendo DS', u'http://feed.shacknews.com/extras/tag_rss.x/Nintendo+DS'),
        (u'iPhone', u'http://feed.shacknews.com/extras/tag_rss.x/iPhone'),
        (u'DLC', u'http://feed.shacknews.com/extras/tag_rss.x/DLC'),
        (u'Valve', u'http://feed.shacknews.com/extras/tag_rss.x/Valve'),
        (u'Electronic Arts',
         u'http://feed.shacknews.com/extras/tag_rss.x/Electronic+Arts')
    ]
--- a/recipes/staradvertiser.recipe
+++ b/recipes/staradvertiser.recipe
@ -14,69 +14,19 @@ class Starbulletin(BasicNewsRecipe):
    publisher = 'Honolulu Star-Advertiser'
    category = 'news, Honolulu, Hawaii'
    oldest_article = 2
    needs_subscription = True
    max_articles_per_feed = 100
    language = 'en'
    no_stylesheets = True
    use_embedded_content = False
    encoding = 'utf8'
    publication_type = 'newspaper'
-    masthead_url = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
+    auto_cleanup = True
 #    extra_css             = """
 #                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
 #                                h1,.brown,.hsa_postCredit{color: #663300}
 #                                .storyDeck{font-size: 1.2em; font-weight: bold}
 #                                img{display: block}
 #                            """
    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True
    }
    keep_only_tags = [
        dict(attrs={'id': 'hsa_storyTitle'}), dict(attrs={'id': 'hsa_storyTitle article-important'}), dict(attrs={'class': ['hsa_dateStamp', 'hsa_postCredit', 'storyDeck']}), dict(name='span', attrs={'class': ['hsa_dateStamp', 'hsa_postCredit']}), dict(name='span', attrs={'class': ['hsa_dateStamp article-important', 'hsa_postCredit article-important']}), dict(name='div', attrs={'class': 'storytext article-important'}), dict(name='div', attrs={'class': 'storytext'})  # noqa
    ]
    remove_tags = [
        # removed 'span' from preceding list to permit keeping of author and
        dict(name=['object', 'link', 'script', 'meta',
                   'base', 'iframe'])        # timestamp
        , dict(attrs={'class': ['insideStoryImage', 'insideStoryAd']}), dict(attrs={'name': 'fb_share'})
    ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://www.staradvertiser.com/manage/Login/')
            br.select_form(name='loginForm')
            br['email'] = self.username
            br['password'] = self.password
            br.submit()
        return br
    feeds = [
-
+        (u'Breaking News',
-    (u'Breaking News', u'http://www.staradvertiser.com/news/breaking/index.rss'),
+         u'http://www.staradvertiser.com/category/breaking-news/feed/'),
-    (u'News', u'http://www.staradvertiser.com/newspremium/index.rss'),
+        (u'Business', u'http://www.staradvertiser.com/business/feed/'),
-    (u'Business', u'http://www.staradvertiser.com/businesspremium/index.rss'),
+        (u'Sports', u'http://www.staradvertiser.com/sports/feed/'),
-    (u'Sports', u'http://www.staradvertiser.com/sportspremium/index.rss'),
+        (u'Features',
-    (u'Features', u'http://www.staradvertiser.com/featurespremium/index.rss')
+         u'http://www.staradvertiser.com/featurespremium/index.rss')
    ]
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll('a'):
            limg = item.find('img')
            if item.string is not None:
                str = item.string
                item.replaceWith(str)
            else:
                if limg:
                    item.name = 'div'
                    item.attrs = []
                else:
                    str = self.tag_to_string(item)
                    item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):  # noqa
                item['alt'] = 'image'
        return soup
--- a/recipes/tech_world.recipe
+++ b/recipes/tech_world.recipe
@ -10,17 +10,16 @@ http://www.techworld.com/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.ptempfile import PersistentTemporaryFile
 class techworld(BasicNewsRecipe):
    __author__ = 'Lorenzo Vigentini'
    description = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK'  # noqa
    cover_url = 'http://www.techworld.com/graphics/header/site_logo.jpg'
    title = 'TechWorld'
    publisher = 'IDG Communication'
-    category = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
+    category = ('Apple, Mac, video, computing, product reviews, '
                'editing, cameras, production')
    language = 'en'
    timefmt = '[%a, %d %b, %Y]'
@ -32,60 +31,16 @@ class techworld(BasicNewsRecipe):
    remove_javascript = True
    no_stylesheets = True
-
+    auto_cleanup = True
    temp_files = []
    articles_are_obfuscated = True
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
        response = br.follow_link(url_regex='?getDynamicPage&print$', nr=0)
        html = response.read()
        self.temp_files.append(PersistentTemporaryFile('_fa.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
        return self.temp_files[-1].name
    keep_only_tags = [
        dict(name='div', attrs={'id': 'articleBody'}),
        dict(name='h2', attrs={'class': 'blogTitle'}),
        dict(name='h3', attrs={'class': 'blogger'}),
    ]
    remove_tags = [
        dict(name='div', attrs={'class': ['submissionBar', 'mpuContainer']}),
        dict(name='div', attrs={'id': [
             'breadcrumb', 'mainContentSidebar', 'articleIconsList', 'loginSubscribeBoxout']}),
        dict(name='ul', attrs={'class': 'articleIconsList'})
    ]
    remove_tags_after = [
        dict(name='div', attrs={'id': 'articleFooter'})
    ]
    feeds = [
-        (u'News', u'http://www.techworld.com/rss/feeds/techworld-news.xml'),
+        (u'News', u'http://www.techworld.com/news/rss'),
-        (u'How-Tos', u'http://www.techworld.com/rss/feeds/techworld-how-tos.xml'),
+        (u'Tutorial', u'http://www.techworld.com/tutorial/rss'),
-        (u'Reviews', u'http://www.techworld.com/rss/feeds/techworld-reviews.xml'),
+        (u'Reviews', u'http://www.techworld.com/review/rss'),
-        (u'Features', u'http://www.techworld.com/rss/feeds/techworld-features.xml'),
+        (u'Features', u'http://www.techworld.com/features/rss'),
-        (u'Storage', u'http://www.techworld.com/rss/feeds/techworld-storage.xml'),
+        (u'Analysis', u'http://www.techworld.com/analysis/rss'),
-        (u'Applications',
+        (u'Galleries',
-         u'http://www.techworld.com/rss/feeds/techworld-applications.xml'),
+         u'http://www.techworld.com/picture-gallery/rss'),
-        (u'Virtualization',
+        (u'TechWorld Blogs',
-         u'http://www.techworld.com/rss/feeds/techworld-virtualisation.xml'),
+         u'http://www.techworld.com/blog/rss'),
        (u'Personal Tech',
         u'http://www.techworld.com/rss/feeds/techworld-personal-tech.xml'),
        (u'Green IT', u'http://www.techworld.com/rss/feeds/techworld-green-it.xml'),
        (u'Security', u'http://www.techworld.com/rss/feeds/techworld-security.xml'),
        (u'Operating Systems',
         u'http://www.techworld.com/rss/feeds/techworld-operating-systems.xml'),
        (u'Networking', u'http://www.techworld.com/rss/feeds/techworld-networking.xml'),
        (u'Mobile and Wireless',
         u'http://www.techworld.com/rss/feeds/techworld-mobile-wireless.xml'),
        (u'Data Centre', u'http://www.techworld.com/rss/feeds/techworld-data-centre.xml'),
        (u'SME', u'http://www.techworld.com/rss/feeds/techworld-sme.xml'),
        (u'TechWorld Blogs', u'http://blogs.techworld.com/atom.xml')
    ]
    extra_css = '''
                img {align:left;}
                '''
--- a/recipes/technology_review.recipe
+++ b/recipes/technology_review.recipe
@ -18,12 +18,14 @@ class TechnologyReview(BasicNewsRecipe):
    .subheadline {font: italic large}
    """
    feeds = [
-        (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
+        (u'Computing',
-        (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
+         u'http://feeds.technologyreview.com/technology_review_Computing'),
-        (u'Communications',
+        (u'Energy',
-         u'http://feeds.technologyreview.com/technology_review_Communications'),
+         u'http://feeds.technologyreview.com/technology_review_Energy'),
-        (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
+        (u'Materials',
-        (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
+         u'http://feeds.technologyreview.com/technology_review_Materials'),
-        (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
+        (u'Biomedicine',
-        (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
+         u'http://feeds.technologyreview.com/technology_review_Biotech'),
        (u'Business',
         u'http://feeds.technologyreview.com/technology_review_Biztech')
    ]
--- a/recipes/the_budget_fashionista.recipe
+++ b/recipes/the_budget_fashionista.recipe
@ -22,20 +22,8 @@ class TheBudgetFashionista(BasicNewsRecipe):
    category = 'news, fashion, comsetics, women'
    lang = 'en-US'
    language = 'en'
    auto_cleanup = True
-    conversion_options = {
+    feeds = [(u'Articles',
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': lang
+              u'http://feeds.feedburner.com/TheBudgetFashionista')
-    }
+    ]
    keep_only_tags = [dict(name='div', attrs={'class': 'columnLeft'})]
    remove_tags_after = dict(name='div', attrs={'class': 'postDetails'})
    remove_tags = [dict(name=['object', 'link', 'script',
                              'iframe', 'form', 'login-button'])]
    feeds = [(u'Articles', u'http://www.thebudgetfashionista.com/feeds/atom/')]
    def preprocess_html(self, soup):
        for it in soup.findAll('img'):
            if it.parent.name == 'a':
                it.parent.name = 'div'
        return soup
--- a/recipes/the_week_magazine_free.recipe
+++ b/recipes/the_week_magazine_free.recipe
@ -23,8 +23,5 @@ class TheWeek(BasicNewsRecipe):
    language = 'en'
    auto_cleanup = True
    feeds = [
-        (u'News-Opinion', u'http://theweek.com/section/index/news_opinion.rss'),
+        (u'Latest articles', u'http://theweek.com/rss.xml'),
        (u'Business', u'http://theweek.com/section/index/business.rss'),
        (u'Arts-Life', u'http://theweek.com/section/index/arts_life.rss'),
        (u'Cartoons', u'http://theweek.com/section/index/cartoon_wit/0/all-cartoons.rss')
    ]
--- a/recipes/usatoday.recipe
+++ b/recipes/usatoday.recipe
@ -14,7 +14,6 @@ class USAToday(BasicNewsRecipe):
    title = 'USA Today'
    __author__ = 'Kovid Goyal'
    description = 'newspaper'
    cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/USAT.jpg'
    encoding = 'utf-8'
    publisher = 'usatoday.com'
    category = 'news, usa'
@ -28,25 +27,42 @@ class USAToday(BasicNewsRecipe):
    filterDuplicates = True
    extra_css = '''
-                    h1, h2 {font-size:xx-large; font-family:Arial,Helvetica,sans-serif;}
+                    h1, h2 {
-                    #post-attributes, .info, .clear {font-size:xx-small; color:#4D4D4D; font-family:Arial,Helvetica,sans-serif;}
+                        font-size:xx-large;
-                    #post-body, #content {font-size:medium; font-family:Arial,Helvetica,sans-serif;}
+                        font-family:Arial,Helvetica,sans-serif;}
                    #post-attributes, .info,
                    .clear {
                        font-size:xx-small; color:#4D4D4D;
                        font-family:Arial,Helvetica,sans-serif;
                    }
                    #post-body,
                    #content {
                        font-size:medium;
                        font-family:Arial,Helvetica,sans-serif;
                    }
                '''
    feeds = [
-        ('Top Headlines', 'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
+        ('Top Headlines',
-        ('Tech Headlines', 'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
+         'http://rssfeeds.usatoday.com/usatoday-NewsTopStories'),
-        ('Personal Tech', 'http://rssfeeds.usatoday.com/UsatodaycomTech-PersonalTalk'),
+        ('Tech Headlines',
-        ('Science', 'http://rssfeeds.usatoday.com/TP-ScienceFair'),
+         'http://rssfeeds.usatoday.com/usatoday-TechTopStories'),
-        ('Health', 'http://rssfeeds.usatoday.com/UsatodaycomHealth-TopStories'),
+        ('Personal Tech',
-        ('Travel Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomTravel-TopStories'),
+         'http://rssfeeds.usatoday.com/UsatodaycomTech-PersonalTalk'),
-        ('Money Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomMoney-TopStories'),
+        ('Health',
         'http://rssfeeds.usatoday.com/UsatodaycomHealth-TopStories'),
        ('Travel Headlines',
         'http://rssfeeds.usatoday.com/UsatodaycomTravel-TopStories'),
        ('Money Headlines',
         'http://rssfeeds.usatoday.com/UsatodaycomMoney-TopStories'),
        ('Entertainment Headlines',
         'http://rssfeeds.usatoday.com/usatoday-LifeTopStories'),
-        ('Sport Headlines', 'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
+        ('Sport Headlines',
-        ('Weather Headlines', 'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
+         'http://rssfeeds.usatoday.com/UsatodaycomSports-TopStories'),
-        ('Most Popular', 'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
+        ('Weather Headlines',
-        ('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
+         'http://rssfeeds.usatoday.com/usatoday-WeatherTopStories'),
        ('Most Popular',
         'http://rssfeeds.usatoday.com/Usatoday-MostViewedArticles'),
    ]
    auto_cleanup = True
--- a/recipes/usnews.recipe
+++ b/recipes/usnews.recipe
@ -20,40 +20,14 @@ class LaPrensa(BasicNewsRecipe):
    use_embedded_content = False
    encoding = 'utf-8'
    language = 'en'
-
+    auto_cleanup = True
    html2lrf_options = [
        '--comment', description, '--category', category, '--publisher', publisher
    ]
    html2epub_options = 'publisher="' + publisher + \
        '"\ncomments="' + description + '"\ntags="' + category + '"'
    keep_only_tags = [
        dict(name='h1'), dict(name='div', attrs={'id': ['dateline']}), dict(
            name='div', attrs={'class': ['blogCredit', 'body']})
    ]
    feeds = [
-
+        (u'Homepage', u'http://www.usnews.com/rss/usnews.rss'),
-    (u'Homepage', u'http://www.usnews.com/rss/usnews.rss'),
+        (u'Health', u'http://www.usnews.com/rss/health'),
-    (u'Health', u'http://www.usnews.com/rss/health/index.rss'),
+        (u'Nation & World', u'http://www.usnews.com/rss/news'),
-    (u'Nation & World', u'http://www.usnews.com/rss/news/index.rss'),
+        (u'Money & Business', u'http://www.usnews.com/rss/money'),
-    (u'Money & Business', u'http://www.usnews.com/rss/business/index.rss'),
+        (u'Education', u'http://www.usnews.com/rss/education'),
-    (u'Education', u'http://www.usnews.com/rss/education/index.rss'),
+        (u'Opinion', u'http://www.usnews.com/rss/opinion'),
-    (u'Opinion', u'http://www.usnews.com/rss/opinion/index.rss'),
+        (u'Science', u'http://www.usnews.com/rss/science')
    (u'Science', u'http://www.usnews.com/rss/science/index.rss')
    ]
    def print_version(self, url):
        return url.replace('.html', '_print.html')
    def get_article_url(self, article):
        raw = article.get('link',  None)
        artcl, sep, unneeded = raw.rpartition('?')
        return artcl
    def preprocess_html(self, soup):
        del soup.body['onload']
        for item in soup.findAll(style=True):
            del item['style']
        return soup
--- a/recipes/waco_tribune.recipe
+++ b/recipes/waco_tribune.recipe
@ -11,10 +11,10 @@ class AdvancedUserRecipe1278773519(BasicNewsRecipe):
    max_articles_per_feed = 100
    feeds = [
-        (u'News', u'http://www.wacotrib.com/news/index.rss2'),
+        (u'News', u'http://www.wacotrib.com/search/?q=&t=article&l=100&d=&d1=&d2=&s=start_time&sd=desc&nsa=eedition&c[]=news/ap_nation,news/ap_nation/*&f=rss'),
-        (u'Sports', u'http://www.wacotrib.com/sports/index.rss2'),
+        (u'Sports', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=sports*&f=rss'),
-        (u'AccessWaco', u'http://www.wacotrib.com/accesswaco/index.rss2'),
+        (u'AccessWaco', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=entertainment/accesswaco*&f=rss'),
-        (u'Opinions', u'http://www.wacotrib.com/opinion/index.rss2')
+        (u'Opinions', u'http://www.wacotrib.com/search/?q=&t=article&l=25&d=&d1=&d2=&s=start_time&sd=desc&c[]=opinion*&f=rss')
    ]
    remove_javascript = True
@ -23,13 +23,4 @@ class AdvancedUserRecipe1278773519(BasicNewsRecipe):
    language = 'en'
    encoding = 'utf-8'
    conversion_options = {'linearize_tables': True}
-    masthead_url = 'http://media.wacotrib.com/designimages/wacotrib_logo.jpg'
+    auto_cleanup = True
    keep_only_tags = [
        dict(name='div', attrs={'class': 'twoColumn left'}),
    ]
    remove_tags = [
        dict(name='div', attrs={'class': 'right blueLinks'}),
    ]
    remove_tags_after = [
        dict(name='div', attrs={'class': 'dottedRule'}),
    ]
--- a/recipes/wash_post.recipe
+++ b/recipes/wash_post.recipe
@ -4,7 +4,6 @@ __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
 www.washingtonpost.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
@ -23,55 +22,18 @@ class TheWashingtonPost(BasicNewsRecipe):
    language = 'en'
    remove_empty_feeds = True
    publication_type = 'newspaper'
-    masthead_url = 'http://www.washingtonpost.com/rw/sites/twpweb/img/logos/twp_logo_300.gif'
+    auto_cleanup = True
    cover_url = strftime(
        'http://www.washingtonpost.com/rw/WashingtonPost/Content/Epaper/%Y-%m-%d/Ax1.pdf')
    extra_css             = """
                               body{font-family: Georgia,serif }
                            """
    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }
    keep_only_tags = [
        dict(attrs={'id': ['content', 'entryhead', 'entrytext']})]
    remove_tags = [
        dict(name=['meta', 'link', 'iframe', 'base']), dict(
            attrs={'id': 'multimedia-leaf-page'})
    ]
    remove_attributes = ['lang', 'property', 'epochtime',
                         'datetitle', 'pagetype', 'contenttype', 'comparetime']
    feeds = [
-
+        (u'World', u'http://feeds.washingtonpost.com/rss/world'),
-    (u'World', u'http://feeds.washingtonpost.com/rss/world'),
+        (u'National', u'http://feeds.washingtonpost.com/rss/national'),
-    (u'National', u'http://feeds.washingtonpost.com/rss/national'),
+        (u'White House',
-    (u'White House', u'http://feeds.washingtonpost.com/rss/politics/whitehouse'),
+         u'http://feeds.washingtonpost.com/rss/politics/whitehouse'),
-    (u'Business', u'http://feeds.washingtonpost.com/rss/business'),
+        (u'Business', u'http://feeds.washingtonpost.com/rss/business'),
-    (u'Opinions', u'http://feeds.washingtonpost.com/rss/opinions'),
+        (u'Opinions', u'http://feeds.washingtonpost.com/rss/opinions'),
-    (u'Investigations', u'http://feeds.washingtonpost.com/rss/investigations'),
+        (u'Local', u'http://feeds.washingtonpost.com/rss/local'),
-    (u'Local', u'http://feeds.washingtonpost.com/rss/local'),
+        (u'Entertainment',
-    (u'Entertainment', u'http://feeds.washingtonpost.com/rss/entertainment'),
+         u'http://feeds.washingtonpost.com/rss/entertainment'),
-    (u'Sports', u'http://feeds.washingtonpost.com/rss/sports'),
+        (u'Sports', u'http://feeds.washingtonpost.com/rss/sports'),
-    (u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'),
+        (u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'),
    (u'Special Reports', u'http://feeds.washingtonpost.com/rss/national/special-reports')
    ]
    def print_version(self, url):
        if '_story.html' in url:
            return url.replace('_story.html', '_print.html')
        return url
    def get_article_url(self, article):
        link = BasicNewsRecipe.get_article_url(self, article)
        if article.id.startswith('http'):
            link = article.id
        if 'washingtonpost.com' not in link:
            self.log('Skipping ads:', link)
            return None
        for it in ['_video.html', '_gallery.html', '_links.html']:
            if it in link:
                self.log('Skipping non-article:', link)
                return None
        return link
--- a/recipes/worldcrunch.recipe
+++ b/recipes/worldcrunch.recipe
@ -14,5 +14,5 @@ class Worldcrunch(BasicNewsRecipe):
    feeds = [
        ('News',
-         'http://www.worldcrunch.com/feed'),
+         'http://www.worldcrunch.com/rss/rss.php'),
    ]