' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class TheResurgence(BasicNewsRecipe): - title = u'The Resurgence' - __author__ = 'Peter Grungi' - language = 'en' - - oldest_article = 7 - max_articles_per_feed = 10 - auto_cleanup = True - cover_url = 'http://cdn.theresurgence.com/images/logo.png' - masthead_url = 'http://cdn.theresurgence.com/images/logo.png' - language = 'en' - publisher = 'The Resurgence' - author = 'The Resurgence' - - feeds = [ - (u'The Resurgence', u'http://feeds.theresurgence.com/TheResurgence?format=xml')] diff --git a/recipes/security_watch.recipe b/recipes/security_watch.recipe index 4780f549c4..13c17f10b1 100644 --- a/recipes/security_watch.recipe +++ b/recipes/security_watch.recipe @@ -10,30 +10,10 @@ class SecurityWatch(BasicNewsRecipe): oldest_article = 14 max_articles_per_feed = 100 use_embedded_content = False - filter_regexps = [r'feedads\.googleadservices\.com'] - filter_regexps = [r'ad\.doubleclick'] - filter_regexps = [r'advert'] language = 'en' - - extra_css = 'div {text-align:left}' - - remove_tags = [dict(id='topBannerContainer'), - dict(id='topBannerSmall'), - dict(id='topSearchBar'), - dict(id='topSearchForm'), - dict(id='rtBannerMPU'), - dict(id='topNavBar'), - dict(id='breadcrumbs'), - # dict(id='entry-28272'), - dict(id='topSearchLinks'), - dict(name='span', attrs={'class': 'date'})] - - remove_tags_after = [dict(id='googlemp')] + auto_cleanup = True feeds = [ - (u'securitywatch', u'http://feeds.ziffdavisenterprise.com/RSS/security_watch/')] - - def postprocess_html(self, soup, first_fetch): - for t in soup.findAll(['table', 'tr', 'td']): - t.name = 'div' - return soup + (u'securitywatch', + u'http://feeds.pcmag.com/Rss.aspx/SectionArticles?sectionId=28026') + ] diff --git a/recipes/sign_on_sd.recipe b/recipes/sign_on_sd.recipe index df942142b2..aefe92b71b 100644 --- a/recipes/sign_on_sd.recipe +++ b/recipes/sign_on_sd.recipe @@ -17,39 +17,38 @@ class AdvancedUserRecipe1315899507(BasicNewsRecipe): auto_cleanup = True remove_empty_feeds = True publication_type = 'newspaper' - masthead_url = 'http://media.signonsandiego.com/e2/sosd/images/sosd_logo.png' feeds = [ - (u'Latest News', u'http://www.signonsandiego.com/rss/headlines/'), - (u'Local News', u'http://www.signonsandiego.com/rss/headlines/metro/'), - (u'Business', u'http://www.signonsandiego.com/rss/headlines/business/'), - (u'Politics', u'http://www.signonsandiego.com/rss/headlines/local/politics/'), - (u'Border & Immigration', u'http://www.signonsandiego.com/rss/headlines/border/'), - (u'Courts', u'http://www.signonsandiego.com/rss/headlines/courts/'), - (u'Education', u'http://www.signonsandiego.com/news/education/'), - (u'Sports', u'http://www.signonsandiego.com/rss/headlines/sports/'), - (u'Chargers', u'http://www.signonsandiego.com/rss/headlines/sports/chargers/'), - (u'Padres', u'http://www.signonsandiego.com/rss/headlines/sports/padres/'), - (u'NFL', u'http://www.signonsandiego.com/rss/headlines/sports/nfl/'), - (u'NBA', u'http://www.signonsandiego.com/rss/headlines/sports/nba/'), - (u'Nick Canepa', u'http://www.signonsandiego.com/rss/authors/nick-canepa/'), - (u'Tim Sullivan', u'http://www.signonsandiego.com/rss/authors/tim-sullivan/'), - (u'Ruben Navarrette', u'http://www.signonsandiego.com/rss/authors/ruben-navarrette/'), - (u'Diane Bell', u'http://www.signonsandiego.com/rss/authors/diane-bell/'), - (u'Smart Living', u'http://www.signonsandiego.com/rss/headlines/smart-living/'), - (u'Photos', u'http://www.signonsandiego.com/rss/photos/'), - (u'Arts', u'http://www.signonsandiego.com/rss/headlines/night-and-day/theater-arts/'), - (u'Books', u'http://www.signonsandiego.com/rss/headlines/lifestyle/books/'), - (u'Currents-Passages', - u'http://www.signonsandiego.com/rss/headlines/lifestyle/currents/passages/'), - (u'Currents-Weekend', - u'http://www.signonsandiego.com/news/rss2/daily/currentsweekend.xml'), - (u'Dialog', u'http://www.signonsandiego.com/news/rss2/daily/dialog.xml'), - (u'Home', u'http://www.signonsandiego.com/rss/headlines/home/'), - (u'Homescape', u'http://www.signonsandiego.com/rss/headlines/lifestyle/homescape/'), - (u'Night & Day', u'http://www.signonsandiego.com/news/rss2/daily/nightday.xml'), - (u'Opinion', u'http://www.signonsandiego.com/rss/headlines/opinion/'), - (u'Quest', u'http://www.signonsandiego.com/news/rss2/daily/quest.xml'), - (u'Travel', u'http://www.signonsandiego.com/news/rss2/daily/travel.xml'), - (u'Wheels', u'http://www.signonsandiego.com/news/rss2/daily/wheels.xml') + (u'Latest News', + u'http://www.sandiegouniontribune.com/latest/rss2.0.xml'), + (u'Business', + u'http://www.sandiegouniontribune.com/business/rss2.0.xml'), + (u'Politics', + u'http://www.sandiegouniontribune.com/news/politics/rss2.0.xml'), + (u'Immigration', + u'http://www.sandiegouniontribune.com/news/immigration/rss2.0.xml'), + (u'Courts', + u'http://www.sandiegouniontribune.com/news/public-safety/rss2.0.xml'), + (u'Education', + u'http://www.sandiegouniontribune.com/news/education/rss2.0.xml'), + (u'Sports', + u'http://www.sandiegouniontribune.com/sports/rss2.0.xml'), + (u'Chargers', + u'http://www.sandiegouniontribune.com/sports/chargers/rss2.0.xml'), + (u'Padres', + u'http://www.sandiegouniontribune.com/sports/padres/rss2.0.xml'), + (u'NFL', + u'http://www.sandiegouniontribune.com/sports/nfl/rss2.0.xml'), + (u'NBA', + u'http://www.sandiegouniontribune.com/sports/nba/rss2.0.xml'), + (u'Photos', + u'http://www.sandiegouniontribune.com/visuals/rss2.0.xml'), + (u'Entertainment', + u'http://www.sandiegouniontribune.com/entertainment/rss2.0.xml'), + (u'Books', + u'http://www.sandiegouniontribune.com/entertainment/books/rss2.0.xml'), + (u'Opinion', + u'http://www.sandiegouniontribune.com/opinion/rss2.0.xml'), + (u'Travel', + u'http://www.sandiegouniontribune.com/lifestyle/travel/rss2.0.xml'), ] diff --git a/recipes/staradvertiser.recipe b/recipes/staradvertiser.recipe index 91e285d8e6..936b247448 100644 --- a/recipes/staradvertiser.recipe +++ b/recipes/staradvertiser.recipe @@ -28,5 +28,5 @@ class Starbulletin(BasicNewsRecipe): (u'Business', u'http://www.staradvertiser.com/business/feed/'), (u'Sports', u'http://www.staradvertiser.com/sports/feed/'), (u'Features', - u'http://www.staradvertiser.com/featurespremium/index.rss') + u'http://www.staradvertiser.com/features/feed/') ] diff --git a/recipes/television_without_pity.recipe b/recipes/television_without_pity.recipe deleted file mode 100644 index 66c96aa77a..0000000000 --- a/recipes/television_without_pity.recipe +++ /dev/null @@ -1,97 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -import re - - -class TelevisionWithoutPity(BasicNewsRecipe): - title = u'Television Without Pity' - language = 'en' - __author__ = 'Snarkastica' - # Used for pulling down an entire show, not just the RSS feed - SHOW = 'http://www.televisionwithoutpity.com/show/SHOW-NAME-HERE/recaps/' - oldest_article = 7 # days - max_articles_per_feed = 25 - # reverse_article_order=True # Useful for entire show, to display in episode order - use_embedded_content = False - - preprocess_regexps = [(re.compile(r'')] - keep_only_tags = [dict(name='span', attrs={'class': 'headline_recap_title'}), dict( - name='p', attrs={'class': 'byline'}), dict(name='div', attrs={'class': 'body_recap'}), dict(name='h1')] - no_stylesheets = True - - # Comment this out and configure process_index() to retrieve a single show - feeds = [ - ('Ltest Recaps', - 'http://www.televisionwithoutpity.com/rss.xml'), - ] - - ''' - This method can be used to grab all recaps for a single show - Set the SHOW constant at the beginning of this file to the URL for a show's recap page - (the page listing all recaps, usually of the form: - http://www.televisionwithoutpity.com/show/SHOW-NAME/recaps/" - Where SHOW-NAME is the hyphenated name of the show. - - To use: - 1. Comment out feeds = [...] earlier in this file - 2. Set the SHOW constant to the show's recap page - 3. Uncomment the following function - ''' - - ''' - def parse_index(self): - soup = self.index_to_soup(self.SHOW) - feeds = [] - articles = [] - showTitle = soup.find('h1').string - recaps = soup.find('table') - for ep in recaps.findAll('tr'): - epData = ep.findAll('td') - epNum = epData[0].find(text=True).strip() - if not epNum == "Ep.": - epT = self.tag_to_string(epData[1].find('em')).strip() - epST = " (or " + self.tag_to_string(epData[1].find('h3')).strip() + ")" - epTitle = epNum + ": " + epT + epST - epData[1].find('em').extract() - epURL = epData[1].find('a', href=True) - epURL = epURL['href'] - epSum = self.tag_to_string(epData[1].find('p')).strip() - epDate = epData[2].find(text=True).strip() - epAuthor = self.tag_to_string(epData[4].find('p')).strip() - articles.append({'title':epTitle, 'url':epURL, 'description':epSum, 'date':epDate, 'author':epAuthor}) - feeds.append((showTitle, articles)) - #self.abort_recipe_processing("test") - return feeds - ''' - - # This will add subsequent pages of multipage recaps to a single article - # page - def append_page(self, soup, appendtag, position): - # If false, will still grab single-page recaplets - if (soup.find('p', attrs={'class': 'pages'})): - pager = soup.find('p', attrs={'class': 'pages'}).find(text='Next') - if pager: - nexturl = pager.parent['href'] - soup2 = self.index_to_soup(nexturl) - texttag = soup2.find('div', attrs={'class': 'body_recap'}) - for it in texttag.findAll(style=True): - del it['style'] - newpos = len(texttag.contents) - self.append_page(soup2, texttag, newpos) - texttag.extract() - appendtag.insert(position, texttag) - - def preprocess_html(self, soup): - self.append_page(soup, soup.body, 3) - return soup - - # Remove the multi page links (we had to keep these in for append_page(), but they can go away now - # Could have used CSS to hide, but some readers ignore CSS. - def postprocess_html(self, soup, first_fetch): - paginator = soup.findAll('p', attrs={'class': 'pages'}) - if paginator: - for p in paginator: - p.extract() - - # TODO: Fix this so it converts the headline class into a heading 1 - return soup