diff --git a/recipes/heritage_foundation.recipe b/recipes/heritage_foundation.recipe
index 80589d3d14..17b70dff14 100644
--- a/recipes/heritage_foundation.recipe
+++ b/recipes/heritage_foundation.recipe
@@ -1,7 +1,6 @@
from calibre.web.feeds.news import BasicNewsRecipe
class HeritageFoundation(BasicNewsRecipe):
title = u'The Heritage Foundation'
- custom_title = "The Heritage Foundation"
description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\
whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \
individual freedom, traditional American values, and a strong national defense.'
@@ -25,7 +24,7 @@ individual freedom, traditional American values, and a strong national defense.'
remove_empty_feeds = True
auto_cleanup = True
- conversion_options = { 'title' : custom_title,
+ conversion_options = {
'comments' : description,
'tags' : tags,
'language' : language,
diff --git a/recipes/richmond_times_dispatch.recipe b/recipes/richmond_times_dispatch.recipe
index 163a6317ff..741bacbda3 100644
--- a/recipes/richmond_times_dispatch.recipe
+++ b/recipes/richmond_times_dispatch.recipe
@@ -1,59 +1,100 @@
+import re
from calibre.web.feeds.recipes import BasicNewsRecipe
-class AdvancedUserRecipe1335532466(BasicNewsRecipe):
- title = u'Richmond Times-Dispatch'
- description = 'News from Richmond, Virginia, USA'
- __author__ = 'jde'
- cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
- language = 'en'
- encoding = 'utf8'
- oldest_article = 1 #days
- max_articles_per_feed = 25
- needs_subscription = False
- remove_javascript = True
- recursions = 0
- use_embedded_content = False
- no_stylesheets = True
- auto_cleanup = True
+class RichmondTimesDispatch(BasicNewsRecipe):
+ title = u'Richmond Times-Dispatch'
+ description = "The Richmond Times-Dispatch is the primary daily newspaper in Richmond, \
+ the capital of Virginia, United States, as well as the Virginia cities of Petersburg, \
+ Chester. Hopewell, Colonial Heights, Charlottesville, Lynchburg, Waynesboro, \
+ and is also a default paper for rural regions of the state. \
+ The RTD has published in some form for more than 150 years."
+ __author__ = '_reader'
+ __date__ = '05 July 2012'
+ __version__ = '1.4'
+ cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
+ masthead_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
+ language = 'en'
+ oldest_article = 1.5 #days
+ max_articles_per_feed = 100
+ needs_subscription = False
+ publisher = 'timesdispatch.com'
+ category = 'news, commentary'
+ tags = 'news'
+ publication_type = 'newspaper'
+ no_stylesheets = True
+ use_embedded_content= False
+ encoding = None
+ simultaneous_downloads = 20
+ recursions = 0
+ remove_javascript = True
+ remove_empty_feeds = True
+ auto_cleanup = False
+
+ conversion_options = {
+ 'comments' : description,
+ 'tags' : tags,
+ 'language' : language,
+ 'publisher' : publisher,
+ 'authors' : publisher,
+ 'smarten_punctuation' : True
+ }
+
+ remove_tags_before = dict(id='hnews hentry item')
+
+ remove_tags_after = dict(name='hr')
+
+ remove_tags = [
+ dict(name='div', attrs={'id':['mg_hd', 'mg_ft', 'sr_b', 'comments_left', 'comments_right']})
+ ,dict(name='div', attrs={'class':['bottom_social','article_bottom']})
+ ,dict(name='table', attrs={'class':['ap-mediabox-table', 'ap-htmltable-table', 'ap-photogallery-table', 'ap-htmlfragment-table']})
+ ]
+
+
+ preprocess_regexps = [
+ (re.compile(r'
', re.DOTALL|re.IGNORECASE), lambda match: ''),
+ (re.compile(r'\s*http://www2.timesdispatch.*?
', re.DOTALL|re.IGNORECASE), lambda match: ''),
+ (re.compile(r'\s*
', re.DOTALL|re.IGNORECASE), lambda match: ''),
+ (re.compile(r'
', re.DOTALL|re.IGNORECASE), lambda match: ''), #strip
line break
+ (re.compile(r'.', re.DOTALL|re.IGNORECASE), lambda match: ''), #strip
line break
+ (re.compile(r'\s*Richmond Times-Dispatch.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), #strip
line break
+ ]
+
feeds = [
+ ('News', 'http://www2.timesdispatch.com/list/feed/rss/news-archive'),
+ ('Breaking News', 'http://www2.timesdispatch.com/list/feed/rss/breaking-news'),
+ ('National News', 'http://www2.timesdispatch.com/list/feed/rss/national-news'),
+ ('Local News', 'http://www2.timesdispatch.com/list/feed/rss/local-news'),
+ ('Business', 'http://www2.timesdispatch.com/list/feed/rss/business'),
+ ('Local Business', 'http://www2.timesdispatch.com/list/feed/rss/local-business'),
+ ('Politics', 'http://www2.timesdispatch.com/list/feed/rss/politics'),
+ ('Virginia Politics', 'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
+ ('Sports', 'http://www2.timesdispatch.com/list/feed/rss/sports2'),
+ ('Health', 'http://www2.timesdispatch.com/feed/rss/lifestyles/health_med_fit/'),
+ ('Entertainment/Life', 'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
+ ('Arts/Theatre', 'http://www2.timesdispatch.com/feed/rss/entertainment/arts_theatre/'),
+ ('Movies', 'http://www2.timesdispatch.com/list/feed/rss/movies'),
+ ('Music', 'http://www2.timesdispatch.com/list/feed/rss/music'),
+ ('Dining & Food', 'http://www2.timesdispatch.com/list/feed/rss/dining'),
+ ('Home & Garden', 'http://www2.timesdispatch.com/list/feed/rss/home-and-garden/'),
+ #inactive('Travel', 'http://www2.timesdispatch.com/feed/rss/travel/'),
+ ('Opinion', 'http://www2.timesdispatch.com/feed/rss/news/opinion/'),
+ ('Editorials', 'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
+ ('Columnists and Blogs', 'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
+ ('Opinion Columnists', 'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
+ ('Letters to the Editor', 'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
+ ('Traffic', 'http://www2.timesdispatch.com/list/feed/rss/traffic'),
+ ]
-('News',
-'http://www2.timesdispatch.com/list/feed/rss/news-archive'),
-('Breaking News',
-'http://www2.timesdispatch.com/list/feed/rss/breaking-news'),
-('National News',
-'http://www2.timesdispatch.com/list/feed/rss/national-news'),
-('Local News',
-'http://www2.timesdispatch.com/list/feed/rss/local-news'),
-('Business',
-'http://www2.timesdispatch.com/list/feed/rss/business'),
-('Local Business',
-'http://www2.timesdispatch.com/list/feed/rss/local-business'),
-('Politics',
-'http://www2.timesdispatch.com/list/feed/rss/politics'),
-('Virginia Politics',
-'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
-('Editorials',
-'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
-('Columnists and Blogs',
-'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
-('Opinion Columnists',
-'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
-('Letters to the Editor',
-'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
-('Traffic',
-'http://www2.timesdispatch.com/list/feed/rss/traffic'),
-('Sports',
-'http://www2.timesdispatch.com/list/feed/rss/sports2'),
-('Entertainment/Life',
-'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
-('Movies',
-'http://www2.timesdispatch.com/list/feed/rss/movies'),
-('Music',
-'http://www2.timesdispatch.com/list/feed/rss/music'),
-('Dining & Food',
-'http://www2.timesdispatch.com/list/feed/rss/dining'),
-
- ]
-
+ def print_version(self,url):
+ article_num = re.sub(r'(^.*)\-([0-9]{4,10})\/$', r'\g<2>', url)
+ ap_pat = re.compile('http')
+ #print '\nDEBUG>>>>>>>>: article_num: ', article_num
+ #print 'DEBUG>>>>>>>>: ap_pat.search(article_num): ', ap_pat.search(article_num)
+ if ap_pat.search(article_num): #AP article, no print url
+ #print 'DEBUG>>>>>>>>: AP URL: ', url
+ return url
+ else:
+ printURL = 'http://www2.timesdispatch.com/member-center/share-this/print/?content=ar' + article_num
+ return printURL