Merge branch 'master' of https://github.com/CoderAllan/calibre

2025-07-07 10:14:46 -04:00 · 2016-12-03 11:57:34 +05:30 · 2016-12-03 11:57:34 +05:30 · 96947fd01a
commit 96947fd01a
parent c925629fe7 a1810043c9
10 changed files with 54 additions and 665 deletions
--- a/recipes/edge_conversations.recipe
+++ b/recipes/edge_conversations.recipe
@ -18,35 +18,6 @@ class EdgeConversationRSS(BasicNewsRecipe):
    oldest_article = 60
    max_articles_per_feed = 100
    no_stylesheets = True
+    auto_cleanup = True

-    keep_only_tags = [
-        dict(name='div', attrs={'class': 'HomeLeftPannel IMGCTRL'})]
-    remove_tags = [
-        dict(name='div', attrs={'class': 'Logo'})
-    ]
-
-    feeds = [(u'Edge RSS', u'http://edge.org/feeds/')]
-
-    def print_version(self, url):
-        return url.replace('conversation/', 'conversation.php?cid=')
-
-    def parse_feeds(self):
-
-        # Call parent's method.
-        feeds = BasicNewsRecipe.parse_feeds(self)
-
-        # Loop through all feeds.
-        for feed in feeds:
-
-            # Loop through all articles in feed.
-            for article in feed.articles[:]:
-
-                # Remove anything that is not a conversation, and remove PDF
-                # files as well...
-
-                if not ('CONVERSATION' in article.title):
-                    feed.articles.remove(article)
-                elif 'pdf' in article.url:
-                    feed.articles.remove(article)
-
-        return feeds
+    feeds = [(u'Edge RSS', u'http://edge.org/feed')]
--- a/recipes/european_voice.recipe
+++ b/recipes/european_voice.recipe
@ -1,53 +0,0 @@
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class EuropeanVoice(BasicNewsRecipe):
-    title = u'European Voice'
-    __author__ = 'malfi'
-    oldest_article = 14
-    max_articles_per_feed = 100
-    no_stylesheets = True
-    cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
-    language = 'en'
-    keep_only_tags = [dict(name='div', attrs={'id': 'articleLeftColumn'})]
-    remove_tags = [dict(name='div', attrs={'id': 'BreadCrump'})]
-    feeds = [
-        (u'Whole site ', u'http://www.europeanvoice.com/Rss/2.xml'),
-        (u'News and analysis', u'http://www.europeanvoice.com/Rss/6.xml'),
-        (u'Comment', u'http://www.europeanvoice.com/Rss/7.xml'),
-        (u'Special reports', u'http://www.europeanvoice.com/Rss/5.xml'),
-        (u'People', u'http://www.europeanvoice.com/Rss/8.xml'),
-        (u'Career', u'http://www.europeanvoice.com/Rss/11.xml'),
-        (u'Policies', u'http://www.europeanvoice.com/Rss/4.xml'),
-        (u'EVents', u'http://www.europeanvoice.com/Rss/10.xml'),
-        (u'Policies - Economics', u'http://www.europeanvoice.com/Rss/31.xml'),
-        (u'Policies - Business', u'http://www.europeanvoice.com/Rss/19.xml'),
-        (u'Policies - Trade', u'http://www.europeanvoice.com/Rss/25.xml'),
-        (u'Policies - Information society',
-         u'http://www.europeanvoice.com/Rss/20.xml'),
-        (u'Policies - Energy', u'http://www.europeanvoice.com/Rss/15.xml'),
-        (u'Policies - Transport', u'http://www.europeanvoice.com/Rss/18.xml'),
-        (u'Policies - Climate change', u'http://www.europeanvoice.com/Rss/16.xml'),
-        (u'Policies - Environment', u'http://www.europeanvoice.com/Rss/17.xml'),
-        (u'Policies - Farming & food', u'http://www.europeanvoice.com/Rss/23.xml'),
-        (u'Policies - Health & society', u'http://www.europeanvoice.com/Rss/24.xml'),
-        (u'Policies - Justice', u'http://www.europeanvoice.com/Rss/29.xml'),
-        (u'Policies - Foreign affairs', u'http://www.europeanvoice.com/Rss/27.xml')
-    ]
-    extra_css = '''
-        h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
-        h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-        p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
-        body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
-        '''
-
-    def print_version(self, url):
-        return url + '?bPrint=1'
-
-    def preprocess_html(self, soup):
-        denied = soup.findAll(True, text='Subscribers')
-        if denied:
-            raise Exception(
-                'Article skipped, because content can only be seen with subscription')
-        return soup
--- a/recipes/kitsapun.recipe
+++ b/recipes/kitsapun.recipe
@ -15,29 +15,17 @@ class Kitsapsun(BasicNewsRecipe):
    publisher = 'Scripps Interactive Newspapers Group'
    category = 'news, Kitsap county, USA'
    language = 'en'
-    oldest_article = 2
-    max_articles_per_feed = 100
+    oldest_article = 7
+    max_articles_per_feed = 50
    no_stylesheets = True
    encoding = 'cp1252'
    use_embedded_content = False
+    auto_cleanup = True

-    conversion_options = {
-        'comments': description, 'tags': category, 'language': language, 'publisher': publisher
-    }
-
-    keep_only_tags = [
-        dict(name='div', attrs={'id': ['story_meta', 'story_content']})]
-
-    remove_tags = [dict(name=['object', 'link', 'embed', 'form', 'iframe'])]
-
-    feeds = [
-
-    (u'News', u'http://www.kitsapsun.com/rss/headlines/news/'),
-    (u'Business', u'http://www.kitsapsun.com/rss/headlines/business/'),
-    (u'Communities', u'http://www.kitsapsun.com/rss/headlines/communities/'),
-    (u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/'),
-    (u'Lifestyles', u'http://www.kitsapsun.com/rss/headlines/lifestyles/')
-    ]
-
-    def print_version(self, url):
-        return url.rpartition('/')[0] + '/?print=1'
+    feeds = [(u'News', u'http://www.kitsapsun.com/feeds/rss/news'),
+             (u'Sports', u'http://www.kitsapsun.com/feeds/rss/sports'),
+             (u'Entertainment',
+              u'http://www.kitsapsun.com/feeds/rss/entertainment'),
+             (u'Lifestyles', u'http://www.kitsapsun.com/feeds/rss/lifestyle'),
+             (u'Opinion', u'http://www.kitsapsun.com/feeds/rss/opinion'),
+             ]
--- a/recipes/mobilenations.recipe
+++ b/recipes/mobilenations.recipe
@ -14,5 +14,5 @@ class HindustanTimes(BasicNewsRecipe):

    feeds = [
        ('News',
-         'http://www.mobilenations.com/rss/mb.xml'),
+         'http://www.mobilenations.com/about?format=RSS'),
    ]
--- a/recipes/nme.recipe
+++ b/recipes/nme.recipe
@ -1,8 +1,4 @@
-import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre import browser
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-from calibre.ebooks.BeautifulSoup import Tag


 class AdvancedUserRecipe1306061239(BasicNewsRecipe):
@ -31,383 +27,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
    simultaneous_downloads = 20
    use_embedded_content = False
    recursions = 0
-
-    conversion_options = {
-        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
-    }
+    auto_cleanup = True

    feeds = [
-        (u'NME News', u'http://www.nme.com/rss/news'),
-        (u'Reviews', u'http://www.nme.com/rss/reviews'),
-        (u'Blogs', u'http://www.nme.com/rss/blogs'),
+        (u'NME News', u'http://www.nme.com/news/feed'),
+        (u'Reviews', u'http://www.nme.com/reviews/feed/'),
+        (u'Blogs', u'http://www.nme.com/blogs/feed'),
    ]
-
-    keep_only_tags = [
-        dict(name='div', attrs={'id': 'content'}),
-    ]
-
-    remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan',
-                         'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
-
-    remove_tags = [
-        dict(name='meta'),
-        dict(name='span', attrs={'class': 'article_info'}),
-        dict(name='div', attrs={'class': 'breadcrumbs'}),
-        dict(name='div', attrs={'class': 'mugshot'}),
-        dict(name='div', attrs={'class': 'header'}),
-        dict(name='div', attrs={'class': re.compile(
-            'youtube.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'class': re.compile(
-            'socialbuttons.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'class': 'clear_both'}),
-        dict(name='div', attrs={'class': re.compile(
-            'headline.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'class': 'member-signedout'}),
-        dict(name='div', attrs={'class': re.compile(
-            'prev_next.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'class': re.compile(
-            'article_related.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'class': re.compile(
-            'feature_bar.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'class': re.compile('ebay.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'id': re.compile(
-            'morenews.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'id': re.compile(
-            'ticketspopup.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'id': re.compile(
-            'ratemy_logprompt.*', re.IGNORECASE)}),
-        dict(name='div', attrs={'id': re.compile(
-            'related_artist.*', re.IGNORECASE)}),
-        dict(name='img', attrs={'class': re.compile(
-            'video_play_large.*', re.IGNORECASE)}),
-        dict(name='ul', attrs={'class': re.compile(
-            'prev_next.*', re.IGNORECASE)}),
-        dict(name='ul', attrs={'class': re.compile(
-            'nme_store.*', re.IGNORECASE)}),
-        dict(name='p', attrs={'class': re.compile('top', re.IGNORECASE)}),
-        dict(name='table', attrs={
-             'class': re.compile('tickets.*', re.IGNORECASE)}),
-    ]
-
-    masthead_url = 'http://default.media.ipcdigital.co.uk/300/000001014/e1ab_orh100000w300/NME-logo.jpg'
-
-    def get_cover_url(self):
-        magazine_page_raw = self.index_to_soup(
-            'http://www.nme.com/magazine', raw=True)
-        magazine_page_raw = re.sub(
-            r'<script\b.+?</script>', '', magazine_page_raw, flags=re.DOTALL | re.IGNORECASE)
-        magazine_page_raw = re.sub(
-            r'\!\[if ', '!--[if ', magazine_page_raw, flags=re.DOTALL | re.IGNORECASE)
-        magazine_page = self.index_to_soup(magazine_page_raw)
-        cov = magazine_page.find('img', attrs={'class': 'magcover'})
-
-        cov2 = str(cov['src'])
-
-        br = browser()
-        br.set_handle_redirect(False)
-        try:
-            br.open_novisit(cov2)
-            cover_url = str(cov2)
-        except:
-            cover_url = 'http://default.media.ipcdigital.co.uk/300/000001014/e1ab_orh100000w300/NME-logo.jpg'
-        return cover_url
-
-    def preprocess_raw_html(self, raw_html, url):
-        '''
-        Need this for a bug on site that prevents blogg post being parsed correctly
-        '''
-        raw_html = re.sub(r'\!\[if ', '!--[if ', raw_html,
-                          flags=re.DOTALL | re.IGNORECASE)
-
-        return raw_html
-
-    def preprocess_html(self, soup):
-        youtube_regex = re.compile(
-            r'(?:youtube\.com/(?:[^/]+/.+/|(?:v|e(?:mbed)?)/|.*[?&]v=)|youtu\.be/)(?P<id>[^"&?/ ]{11})', re.DOTALL | re.IGNORECASE)
-        instagram_regex = re.compile(
-            r'.*?instagram.*?', re.DOTALL | re.IGNORECASE)
-        twitter_regex = re.compile(r'.*?twitter.*?', re.DOTALL | re.IGNORECASE)
-        visualise_regex = re.compile(
-            r'.*?visualise.*?', re.DOTALL | re.IGNORECASE)
-        soundcloud_regex = re.compile(
-            r'(?P<url>.*?(w|api)\.soundcloud.*?com\/(tracks|playlists)\/\d{8,9})', re.DOTALL | re.IGNORECASE)
-        dailymotion_regex = re.compile(
-            r'.*?dailymotion.*?', re.DOTALL | re.IGNORECASE)
-        spotify_regex = re.compile(r'.*?spotify.*?', re.DOTALL | re.IGNORECASE)
-        vine_regex = re.compile(r'.*?vine.*?', re.DOTALL | re.IGNORECASE)
-        doubleHtmlEntities = re.compile(
-            ur'(&amp;)(?P<e>[\d\w\#]*;)', re.DOTALL | re.IGNORECASE | re.UNICODE)
-        for iframe in soup.findAll('iframe'):
-            if iframe.has_key('src') and youtube_regex.search(iframe['src']) is not None:  # noqa
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ YouTube ]')
-                pq.insert(1, br)
-                m = youtube_regex.search(iframe['src'])
-                if m.group('id') is not None:
-                    imgTag = Tag(soup, 'img', [
-                                 ('src', 'http://img.youtube.com/vi/' + m.group('id') + '/0.jpg')])
-                    pq.insert(len(pq.contents), imgTag)
-                pq.insert(len(pq.contents), iframe['src'])
-                iframe.replaceWith(pq)
-            elif iframe.has_key('src') and soundcloud_regex.search(iframe['src']) is not None:  # noqa
-                m = soundcloud_regex.search(iframe['src'])
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ SoundCloud ]')
-                pq.insert(1, br)
-                pq.insert(2, m.group('url'))
-                iframe.replaceWith(pq)
-            elif iframe.has_key('src') and dailymotion_regex.search(iframe['src']) is not None:  # noqa
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ DailyMotion ]')
-                pq.insert(1, br)
-                imgUrl = self.get_dailymotion_pic(iframe['src'])
-                if imgUrl is not None:
-                    imgTag = Tag(soup, 'img', [('src', imgUrl)])
-                    pq.insert(len(pq.contents), imgTag)
-                pq.insert(len(pq.contents), iframe['src'])
-                iframe.replaceWith(pq)
-            elif iframe.has_key('src') and spotify_regex.search(iframe['src']) is not None:  # noqa
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ Spotify ]')
-                pq.insert(1, br)
-                imgUrl = self.get_spotify_pic(iframe['src'])
-                if imgUrl is not None:
-                    imgTag = Tag(soup, 'img', [('src', imgUrl)])
-                    pq.insert(len(pq.contents), imgTag)
-                pq.insert(len(pq.contents), iframe['src'])
-                iframe.replaceWith(pq)
-            elif iframe.has_key('src') and vine_regex.search(iframe['src']) is not None:  # noqa
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ Vine ]')
-                pq.insert(1, br)
-                imgUrl = self.get_vine_pic(iframe['src'])
-                if imgUrl is not None:
-                    imgTag = Tag(soup, 'img', [('src', imgUrl)])
-                    pq.insert(len(pq.contents), imgTag)
-                pq.insert(len(pq.contents), iframe['src'])
-                iframe.replaceWith(pq)
-            elif iframe.has_key('src') and visualise_regex.search(iframe['src']) is not None:  # noqa
-                imgUrl = self.get_visualise_pic(iframe['src'])
-                if imgUrl is not None:
-                    imgTag = Tag(soup, 'img', [('src', imgUrl)])
-                    iframe.replaceWith(imgTag)
-        for blockquote in soup.findAll('blockquote'):
-            if blockquote.has_key('class') and twitter_regex.search(blockquote['class']) is not None:  # noqa
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ Twitter ]')
-                pq.insert(len(pq.contents), br)
-                match = re.search(
-                    "(?P<url>pic\.twitter[^\s<]+)", str(blockquote))
-                if match is not None:
-                    img = self.get_twitter_pic(str(match.group("url")))
-                    if img is not None:
-                        pq.insert(len(pq.contents), img)
-                for p in blockquote.findAll(name='p'):
-                    x = 0
-                    plen = len(p.contents)
-                    while True:
-                        c = len(pq.contents)
-                        if p.contents[x].string is not None:
-                            pq.insert(c, re.sub(doubleHtmlEntities, lambda m: '&' + m.group(
-                                2), str(p.contents[x].string), re.IGNORECASE | re.UNICODE))
-                        else:
-                            pq.insert(c, p.contents[x].content)
-                        x += 1
-                        if x == plen:
-                            break
-                    br = Tag(soup, 'br')
-                    pq.insert(len(pq.contents), br)
-                    p.extract()
-                if len(blockquote.contents) > 0:
-                    x = 0
-                    xlen = len(blockquote.contents)
-                    while True:
-                        c = len(pq.contents)
-                        if blockquote.contents[x].string is not None:
-                            pq.insert(c, re.sub(doubleHtmlEntities, lambda m: '&' + m.group(
-                                2), str(blockquote.contents[x].string), re.IGNORECASE | re.UNICODE))
-                        else:
-                            pq.insert(c, blockquote.contents[x].content)
-                        x += 1
-                        if x == xlen:
-                            break
-                blockquote.replaceWith(pq)
-            elif blockquote.has_key('class') and instagram_regex.search(blockquote['class']) is not None:  # noqa
-                pq = Tag(soup, 'blockquote')
-                br = Tag(soup, 'br')
-                pq.insert(0, '[ Instagram ]')
-                pq.insert(1, br)
-                a = blockquote.find(name='a', attrs={'href': instagram_regex})
-                imgUrl = None
-                if a is not None:
-                    imgUrl = self.get_instagram_pic(str(a['href']))
-                if imgUrl is not None:
-                    img = Tag(soup, 'img', [('src', imgUrl)])
-                    pq.insert(len(pq.contents), img)
-                for p in blockquote.findAll(name='p'):
-                    x = 0
-                    plen = len(p.contents)
-                    while x < plen:
-                        c = len(pq.contents)
-                        if p.contents[x].string is not None:
-                            pq.insert(c, re.sub(doubleHtmlEntities, lambda m: '&' + m.group(
-                                2), str(p.contents[x].string), re.IGNORECASE | re.UNICODE))
-                        # else:
-                            # pq.insert(c, p.contents[x].content)
-                        x += 1
-                    br = Tag(soup, 'br')
-                    c = len(pq.contents)
-                    pq.insert(c, br)
-                blockquote.replaceWith(pq)
-        for alink in soup.findAll('a'):
-            if alink.string is not None:
-                tstr = alink.string
-                alink.replaceWith(tstr)
-            elif alink.img is not None:
-                tstr = alink.img
-                alink.replaceWith(tstr)
-            elif alink.span is not None:
-                tstr = alink.span
-                alink.replaceWith(tstr)
-        return soup
-
-    def get_visualise_pic(self, url):
-        returnValue = None
-        try:
-            raw = self.browser.open(url).read()
-        except:
-            print '404: ' + url
-            return returnValue
-        bs = BeautifulSoup(raw)
-        imgRaw = bs.find(name='meta', attrs={'property': 'og:image'})
-        if imgRaw is not None:
-            returnValue = str(imgRaw['content'])
-        return returnValue
-
-    def get_twitter_pic(self, url):
-        returnValue = None
-        try:
-            raw = self.browser.open('https://' + url).read()
-        except:
-            print '404: ' + url
-            return returnValue
-        bs = BeautifulSoup(raw)
-        refresh = bs.find('meta', {'http-equiv': 'refresh'})
-        if refresh is not None:
-            content = refresh.get('content').partition('=')[2]
-            try:
-                raw = self.browser.open(content).read()
-            except:
-                print '404: ' + url
-                return returnValue
-            bs = BeautifulSoup(raw)
-        img = bs.find(name='img', attrs={
-                      'alt': re.compile('.*permalink.*', re.IGNORECASE)})
-        if img is not None:
-            returnValue = img
-        return returnValue
-
-    def get_soundcloud_pic(self, url):
-        # content loaded via javascript and require an login and/or registered application identification
-        # returnValue = None
-        # raw = self.browser.open(soundcloudUrl + '&visual=true').read()
-        # bs = BeautifulSoup(raw)
-        # imgRaw = bs.find(name='div', attrs={'style':re.compile(r'backgroud-image:*?',re.IGNORECASE)})
-        # if imgRaw is not None:
-            # returnValue = str(imgRaw['style'])
-        return None  # returnValue
-
-    def get_instagram_pic(self, url):
-        returnValue = None
-        try:
-            raw = self.browser.open(url).read()
-        except:
-            print '404: ' + url
-            return returnValue
-        m = re.search('\"display_src\":\"(?P<url>http[s]?:.*?)\"', str(raw))
-        if m is not None:
-            returnValue = re.sub(r'\\', '', m.group(
-                "url"), flags=re.DOTALL | re.IGNORECASE)
-        return returnValue
-
-    def get_dailymotion_pic(self, url):
-        returnValue = None
-        try:
-            raw = self.browser.open(url).read()
-        except:
-            print '404: ' + url
-            return returnValue
-        m = re.search('("thumbnail_url\"\:\")(?P<url>http.*?)(\")', str(raw))
-        if m is not None:
-            returnValue = re.sub(r'\\', '', m.group(
-                "url"), flags=re.DOTALL | re.IGNORECASE)
-        return returnValue
-
-    def get_spotify_pic(self, url):
-        returnValue = None
-        try:
-            raw = self.browser.open(url).read()
-        except:
-            print '404: ' + url
-            return returnValue
-        m = re.search('data-ca=\"(?P<url>.*?)\"', str(raw))
-        if m is not None:
-            returnValue = m.group("url")
-        return returnValue
-
-    def get_vine_pic(self, url):
-        returnValue = None
-        try:
-            raw = self.browser.open(url).read()
-        except:
-            print '404: ' + url
-            return returnValue
-        m = re.search('"thumbnail.*?src=\"(?P<url>.*?)\"', str(raw))
-        if m is not None:
-            returnValue = m.group("url")
-        return returnValue
-
-    preprocess_regexps = [
-        (re.compile(r'<script\b.+?</script>', re.DOTALL | re.IGNORECASE), lambda h1: ''),
-        (re.compile(r'<a.* id="buy-tickets-button".*</a>', re.IGNORECASE), lambda h2: ''),
-        (re.compile(r'<a.* class="gallery.*</a>', re.IGNORECASE), lambda h2: ''),
-    ]
-
-    extra_css = '''
-                    h1 h2 {
-                        font-family:Arial,Helvetica,sans-serif;
-                        font-weight:bold;font-size:large;
-                    }
-                    h3 {
-                        font-family:Arial,Helvetica,sans-serif;
-                        font-weight:normal;
-                        font-size:small;
-                        font-style:italic;
-                        display:inline;
-                    }
-                    body {
-                        font-family:Helvetica,Arial,sans-serif;
-                        font-size:small;
-                    }
-                    blockquote {
-                        font-family:"Courier New",
-                        Courier, monospace;
-                        font-size:90%;
-                    }
-                    img {
-                        display:block;
-                    }
-                    .date{
-                        font-style:italic;
-                        font-weight:normal;
-                    }
-                    .article_header>p:not(.date){
-                        font-weight:bold;
-                    }
-                '''
--- a/recipes/resurgence.recipe
+++ b/recipes/resurgence.recipe
@ -1,22 +0,0 @@
-__license__ = 'GPL v3'
-__copyright__ = '2012, Peter Grungi <p dot grungi at gmail dot com>'
-
-from calibre.web.feeds.news import BasicNewsRecipe
-
-
-class TheResurgence(BasicNewsRecipe):
-    title = u'The Resurgence'
-    __author__ = 'Peter Grungi'
-    language = 'en'
-
-    oldest_article = 7
-    max_articles_per_feed = 10
-    auto_cleanup = True
-    cover_url = 'http://cdn.theresurgence.com/images/logo.png'
-    masthead_url = 'http://cdn.theresurgence.com/images/logo.png'
-    language = 'en'
-    publisher = 'The Resurgence'
-    author = 'The Resurgence'
-
-    feeds = [
-        (u'The Resurgence', u'http://feeds.theresurgence.com/TheResurgence?format=xml')]
--- a/recipes/security_watch.recipe
+++ b/recipes/security_watch.recipe
@ -10,30 +10,10 @@ class SecurityWatch(BasicNewsRecipe):
    oldest_article = 14
    max_articles_per_feed = 100
    use_embedded_content = False
-    filter_regexps = [r'feedads\.googleadservices\.com']
-    filter_regexps = [r'ad\.doubleclick']
-    filter_regexps = [r'advert']
    language = 'en'
-
-    extra_css = 'div {text-align:left}'
-
-    remove_tags = [dict(id='topBannerContainer'),
-                   dict(id='topBannerSmall'),
-                   dict(id='topSearchBar'),
-                   dict(id='topSearchForm'),
-                   dict(id='rtBannerMPU'),
-                   dict(id='topNavBar'),
-                   dict(id='breadcrumbs'),
-                   # dict(id='entry-28272'),
-                   dict(id='topSearchLinks'),
-                   dict(name='span', attrs={'class': 'date'})]
-
-    remove_tags_after = [dict(id='googlemp')]
+    auto_cleanup = True

    feeds = [
-        (u'securitywatch', u'http://feeds.ziffdavisenterprise.com/RSS/security_watch/')]
-
-    def postprocess_html(self, soup, first_fetch):
-        for t in soup.findAll(['table', 'tr', 'td']):
-            t.name = 'div'
-        return soup
+        (u'securitywatch',
+         u'http://feeds.pcmag.com/Rss.aspx/SectionArticles?sectionId=28026')
+        ]
--- a/recipes/sign_on_sd.recipe
+++ b/recipes/sign_on_sd.recipe
@ -17,39 +17,38 @@ class AdvancedUserRecipe1315899507(BasicNewsRecipe):
    auto_cleanup = True
    remove_empty_feeds = True
    publication_type = 'newspaper'
-    masthead_url = 'http://media.signonsandiego.com/e2/sosd/images/sosd_logo.png'

    feeds = [
-        (u'Latest News', u'http://www.signonsandiego.com/rss/headlines/'),
-        (u'Local News', u'http://www.signonsandiego.com/rss/headlines/metro/'),
-        (u'Business', u'http://www.signonsandiego.com/rss/headlines/business/'),
-        (u'Politics', u'http://www.signonsandiego.com/rss/headlines/local/politics/'),
-        (u'Border & Immigration', u'http://www.signonsandiego.com/rss/headlines/border/'),
-        (u'Courts', u'http://www.signonsandiego.com/rss/headlines/courts/'),
-        (u'Education', u'http://www.signonsandiego.com/news/education/'),
-        (u'Sports', u'http://www.signonsandiego.com/rss/headlines/sports/'),
-        (u'Chargers', u'http://www.signonsandiego.com/rss/headlines/sports/chargers/'),
-        (u'Padres', u'http://www.signonsandiego.com/rss/headlines/sports/padres/'),
-        (u'NFL', u'http://www.signonsandiego.com/rss/headlines/sports/nfl/'),
-        (u'NBA', u'http://www.signonsandiego.com/rss/headlines/sports/nba/'),
-        (u'Nick Canepa', u'http://www.signonsandiego.com/rss/authors/nick-canepa/'),
-        (u'Tim Sullivan', u'http://www.signonsandiego.com/rss/authors/tim-sullivan/'),
-        (u'Ruben Navarrette', u'http://www.signonsandiego.com/rss/authors/ruben-navarrette/'),
-        (u'Diane Bell', u'http://www.signonsandiego.com/rss/authors/diane-bell/'),
-        (u'Smart Living', u'http://www.signonsandiego.com/rss/headlines/smart-living/'),
-        (u'Photos', u'http://www.signonsandiego.com/rss/photos/'),
-        (u'Arts', u'http://www.signonsandiego.com/rss/headlines/night-and-day/theater-arts/'),
-        (u'Books', u'http://www.signonsandiego.com/rss/headlines/lifestyle/books/'),
-        (u'Currents-Passages',
-         u'http://www.signonsandiego.com/rss/headlines/lifestyle/currents/passages/'),
-        (u'Currents-Weekend',
-         u'http://www.signonsandiego.com/news/rss2/daily/currentsweekend.xml'),
-        (u'Dialog', u'http://www.signonsandiego.com/news/rss2/daily/dialog.xml'),
-        (u'Home', u'http://www.signonsandiego.com/rss/headlines/home/'),
-        (u'Homescape', u'http://www.signonsandiego.com/rss/headlines/lifestyle/homescape/'),
-        (u'Night & Day', u'http://www.signonsandiego.com/news/rss2/daily/nightday.xml'),
-        (u'Opinion', u'http://www.signonsandiego.com/rss/headlines/opinion/'),
-        (u'Quest', u'http://www.signonsandiego.com/news/rss2/daily/quest.xml'),
-        (u'Travel', u'http://www.signonsandiego.com/news/rss2/daily/travel.xml'),
-        (u'Wheels', u'http://www.signonsandiego.com/news/rss2/daily/wheels.xml')
+        (u'Latest News',
+         u'http://www.sandiegouniontribune.com/latest/rss2.0.xml'),
+        (u'Business',
+         u'http://www.sandiegouniontribune.com/business/rss2.0.xml'),
+        (u'Politics',
+         u'http://www.sandiegouniontribune.com/news/politics/rss2.0.xml'),
+        (u'Immigration',
+         u'http://www.sandiegouniontribune.com/news/immigration/rss2.0.xml'),
+        (u'Courts',
+         u'http://www.sandiegouniontribune.com/news/public-safety/rss2.0.xml'),
+        (u'Education',
+         u'http://www.sandiegouniontribune.com/news/education/rss2.0.xml'),
+        (u'Sports',
+         u'http://www.sandiegouniontribune.com/sports/rss2.0.xml'),
+        (u'Chargers',
+         u'http://www.sandiegouniontribune.com/sports/chargers/rss2.0.xml'),
+        (u'Padres',
+         u'http://www.sandiegouniontribune.com/sports/padres/rss2.0.xml'),
+        (u'NFL',
+         u'http://www.sandiegouniontribune.com/sports/nfl/rss2.0.xml'),
+        (u'NBA',
+         u'http://www.sandiegouniontribune.com/sports/nba/rss2.0.xml'),
+        (u'Photos',
+         u'http://www.sandiegouniontribune.com/visuals/rss2.0.xml'),
+        (u'Entertainment',
+         u'http://www.sandiegouniontribune.com/entertainment/rss2.0.xml'),
+        (u'Books',
+         u'http://www.sandiegouniontribune.com/entertainment/books/rss2.0.xml'),
+        (u'Opinion',
+         u'http://www.sandiegouniontribune.com/opinion/rss2.0.xml'),
+        (u'Travel',
+         u'http://www.sandiegouniontribune.com/lifestyle/travel/rss2.0.xml'),
    ]
--- a/recipes/staradvertiser.recipe
+++ b/recipes/staradvertiser.recipe
@ -28,5 +28,5 @@ class Starbulletin(BasicNewsRecipe):
        (u'Business', u'http://www.staradvertiser.com/business/feed/'),
        (u'Sports', u'http://www.staradvertiser.com/sports/feed/'),
        (u'Features',
-         u'http://www.staradvertiser.com/featurespremium/index.rss')
+         u'http://www.staradvertiser.com/features/feed/')
    ]
--- a/recipes/television_without_pity.recipe
+++ b/recipes/television_without_pity.recipe
@ -1,97 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-import re
-
-
-class TelevisionWithoutPity(BasicNewsRecipe):
-    title = u'Television Without Pity'
-    language = 'en'
-    __author__ = 'Snarkastica'
-    # Used for pulling down an entire show, not just the RSS feed
-    SHOW = 'http://www.televisionwithoutpity.com/show/SHOW-NAME-HERE/recaps/'
-    oldest_article = 7  # days
-    max_articles_per_feed = 25
-    # reverse_article_order=True # Useful for entire show, to display in episode order
-    use_embedded_content = False
-
-    preprocess_regexps = [(re.compile(r'<span class="headline_recap_title .*?>',
-                                      re.DOTALL | re.IGNORECASE), lambda match: '<span class="headline_recap_title">')]
-    keep_only_tags = [dict(name='span', attrs={'class': 'headline_recap_title'}), dict(
-        name='p', attrs={'class': 'byline'}), dict(name='div', attrs={'class': 'body_recap'}), dict(name='h1')]
-    no_stylesheets = True
-
-    # Comment this out and configure process_index() to retrieve a single show
-    feeds = [
-        ('Ltest Recaps',
-         'http://www.televisionwithoutpity.com/rss.xml'),
-    ]
-
-    '''
-    This method can be used to grab all recaps for a single show
-    Set the SHOW constant at the beginning of this file to the URL for a show's recap page
-    (the page listing all recaps, usually of the form:
-    http://www.televisionwithoutpity.com/show/SHOW-NAME/recaps/"
-    Where SHOW-NAME is the hyphenated name of the show.
-
-    To use:
-    1. Comment out feeds = [...] earlier in this file
-    2. Set the SHOW constant to the show's recap page
-    3. Uncomment the following function
-    '''
-
-    '''
-    def parse_index(self):
-        soup = self.index_to_soup(self.SHOW)
-        feeds = []
-        articles = []
-        showTitle = soup.find('h1').string
-        recaps = soup.find('table')
-        for ep in recaps.findAll('tr'):
-            epData = ep.findAll('td')
-            epNum = epData[0].find(text=True).strip()
-            if not epNum == "Ep.":
-                epT = self.tag_to_string(epData[1].find('em')).strip()
-                epST = " (or " + self.tag_to_string(epData[1].find('h3')).strip() + ")"
-                epTitle = epNum + ": " + epT + epST
-                epData[1].find('em').extract()
-                epURL = epData[1].find('a', href=True)
-                epURL = epURL['href']
-                epSum = self.tag_to_string(epData[1].find('p')).strip()
-                epDate = epData[2].find(text=True).strip()
-                epAuthor = self.tag_to_string(epData[4].find('p')).strip()
-                articles.append({'title':epTitle, 'url':epURL, 'description':epSum, 'date':epDate, 'author':epAuthor})
-        feeds.append((showTitle, articles))
-        #self.abort_recipe_processing("test")
-        return feeds
-    '''
-
-    # This will add subsequent pages of multipage recaps to a single article
-    # page
-    def append_page(self, soup, appendtag, position):
-        # If false, will still grab single-page recaplets
-        if (soup.find('p', attrs={'class': 'pages'})):
-            pager = soup.find('p', attrs={'class': 'pages'}).find(text='Next')
-            if pager:
-                nexturl = pager.parent['href']
-                soup2 = self.index_to_soup(nexturl)
-                texttag = soup2.find('div', attrs={'class': 'body_recap'})
-                for it in texttag.findAll(style=True):
-                    del it['style']
-                newpos = len(texttag.contents)
-                self.append_page(soup2, texttag, newpos)
-                texttag.extract()
-                appendtag.insert(position, texttag)
-
-    def preprocess_html(self, soup):
-        self.append_page(soup, soup.body, 3)
-        return soup
-
-    # Remove the multi page links (we had to keep these in for append_page(), but they can go away now
-    # Could have used CSS to hide, but some readers ignore CSS.
-    def postprocess_html(self, soup, first_fetch):
-        paginator = soup.findAll('p', attrs={'class': 'pages'})
-        if paginator:
-            for p in paginator:
-                p.extract()
-
-                # TODO: Fix this so it converts the headline class into a heading 1
-        return soup