mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Updated Richmod Times-Dispatch
This commit is contained in:
parent
ecd2d49ab6
commit
8934634e22
@ -1,7 +1,6 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
class HeritageFoundation(BasicNewsRecipe):
|
class HeritageFoundation(BasicNewsRecipe):
|
||||||
title = u'The Heritage Foundation'
|
title = u'The Heritage Foundation'
|
||||||
custom_title = "The Heritage Foundation"
|
|
||||||
description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\
|
description = 'Founded in 1973, The Heritage Foundation is a research and educational institution—a think tank—\
|
||||||
whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \
|
whose mission is to formulate and promote conservative public policies based on the principles of free enterprise, limited government, \
|
||||||
individual freedom, traditional American values, and a strong national defense.'
|
individual freedom, traditional American values, and a strong national defense.'
|
||||||
@ -25,7 +24,7 @@ individual freedom, traditional American values, and a strong national defense.'
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
|
||||||
conversion_options = { 'title' : custom_title,
|
conversion_options = {
|
||||||
'comments' : description,
|
'comments' : description,
|
||||||
'tags' : tags,
|
'tags' : tags,
|
||||||
'language' : language,
|
'language' : language,
|
||||||
|
@ -1,59 +1,100 @@
|
|||||||
|
import re
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
class AdvancedUserRecipe1335532466(BasicNewsRecipe):
|
class RichmondTimesDispatch(BasicNewsRecipe):
|
||||||
title = u'Richmond Times-Dispatch'
|
title = u'Richmond Times-Dispatch'
|
||||||
description = 'News from Richmond, Virginia, USA'
|
description = "The Richmond Times-Dispatch is the primary daily newspaper in Richmond, \
|
||||||
__author__ = 'jde'
|
the capital of Virginia, United States, as well as the Virginia cities of Petersburg, \
|
||||||
cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
|
Chester. Hopewell, Colonial Heights, Charlottesville, Lynchburg, Waynesboro, \
|
||||||
language = 'en'
|
and is also a default paper for rural regions of the state. \
|
||||||
encoding = 'utf8'
|
The RTD has published in some form for more than 150 years."
|
||||||
oldest_article = 1 #days
|
__author__ = '_reader'
|
||||||
max_articles_per_feed = 25
|
__date__ = '05 July 2012'
|
||||||
needs_subscription = False
|
__version__ = '1.4'
|
||||||
remove_javascript = True
|
cover_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
|
||||||
recursions = 0
|
masthead_url = 'http://static2.dukecms.com/va_tn/timesdispatch_com/site-media/img/icons/logo252x97.png'
|
||||||
use_embedded_content = False
|
language = 'en'
|
||||||
no_stylesheets = True
|
oldest_article = 1.5 #days
|
||||||
auto_cleanup = True
|
max_articles_per_feed = 100
|
||||||
|
needs_subscription = False
|
||||||
|
publisher = 'timesdispatch.com'
|
||||||
|
category = 'news, commentary'
|
||||||
|
tags = 'news'
|
||||||
|
publication_type = 'newspaper'
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content= False
|
||||||
|
encoding = None
|
||||||
|
simultaneous_downloads = 20
|
||||||
|
recursions = 0
|
||||||
|
remove_javascript = True
|
||||||
|
remove_empty_feeds = True
|
||||||
|
auto_cleanup = False
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comments' : description,
|
||||||
|
'tags' : tags,
|
||||||
|
'language' : language,
|
||||||
|
'publisher' : publisher,
|
||||||
|
'authors' : publisher,
|
||||||
|
'smarten_punctuation' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
remove_tags_before = dict(id='hnews hentry item')
|
||||||
|
|
||||||
|
remove_tags_after = dict(name='hr')
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'id':['mg_hd', 'mg_ft', 'sr_b', 'comments_left', 'comments_right']})
|
||||||
|
,dict(name='div', attrs={'class':['bottom_social','article_bottom']})
|
||||||
|
,dict(name='table', attrs={'class':['ap-mediabox-table', 'ap-htmltable-table', 'ap-photogallery-table', 'ap-htmlfragment-table']})
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile(r'<table class="ap-story-table hnews hentry item".*?<td class="ap-story-td">', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
|
(re.compile(r'<p>\s*http://www2.timesdispatch.*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
|
(re.compile(r'<p>\s*<img src="http://static2.dukecms.*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
|
(re.compile(r'<p>\s*<a href="http://www2.timesdispatch.*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||||
|
(re.compile(r'<hr.*?>', re.DOTALL|re.IGNORECASE), lambda match: ''), #strip <hr /> line break
|
||||||
|
(re.compile(r'<a\s*rel="item-license.*?Use</a>.', re.DOTALL|re.IGNORECASE), lambda match: ''), #strip <hr /> line break
|
||||||
|
(re.compile(r'<small>\s*Richmond Times-Dispatch.*?</small>', re.DOTALL|re.IGNORECASE), lambda match: ''), #strip <hr /> line break
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
('News', 'http://www2.timesdispatch.com/list/feed/rss/news-archive'),
|
||||||
|
('Breaking News', 'http://www2.timesdispatch.com/list/feed/rss/breaking-news'),
|
||||||
|
('National News', 'http://www2.timesdispatch.com/list/feed/rss/national-news'),
|
||||||
|
('Local News', 'http://www2.timesdispatch.com/list/feed/rss/local-news'),
|
||||||
|
('Business', 'http://www2.timesdispatch.com/list/feed/rss/business'),
|
||||||
|
('Local Business', 'http://www2.timesdispatch.com/list/feed/rss/local-business'),
|
||||||
|
('Politics', 'http://www2.timesdispatch.com/list/feed/rss/politics'),
|
||||||
|
('Virginia Politics', 'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
|
||||||
|
('Sports', 'http://www2.timesdispatch.com/list/feed/rss/sports2'),
|
||||||
|
('Health', 'http://www2.timesdispatch.com/feed/rss/lifestyles/health_med_fit/'),
|
||||||
|
('Entertainment/Life', 'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
|
||||||
|
('Arts/Theatre', 'http://www2.timesdispatch.com/feed/rss/entertainment/arts_theatre/'),
|
||||||
|
('Movies', 'http://www2.timesdispatch.com/list/feed/rss/movies'),
|
||||||
|
('Music', 'http://www2.timesdispatch.com/list/feed/rss/music'),
|
||||||
|
('Dining & Food', 'http://www2.timesdispatch.com/list/feed/rss/dining'),
|
||||||
|
('Home & Garden', 'http://www2.timesdispatch.com/list/feed/rss/home-and-garden/'),
|
||||||
|
#inactive('Travel', 'http://www2.timesdispatch.com/feed/rss/travel/'),
|
||||||
|
('Opinion', 'http://www2.timesdispatch.com/feed/rss/news/opinion/'),
|
||||||
|
('Editorials', 'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
|
||||||
|
('Columnists and Blogs', 'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
|
||||||
|
('Opinion Columnists', 'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
|
||||||
|
('Letters to the Editor', 'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
|
||||||
|
('Traffic', 'http://www2.timesdispatch.com/list/feed/rss/traffic'),
|
||||||
|
]
|
||||||
|
|
||||||
('News',
|
def print_version(self,url):
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/news-archive'),
|
article_num = re.sub(r'(^.*)\-([0-9]{4,10})\/$', r'\g<2>', url)
|
||||||
('Breaking News',
|
ap_pat = re.compile('http')
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/breaking-news'),
|
#print '\nDEBUG>>>>>>>>: article_num: ', article_num
|
||||||
('National News',
|
#print 'DEBUG>>>>>>>>: ap_pat.search(article_num): ', ap_pat.search(article_num)
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/national-news'),
|
if ap_pat.search(article_num): #AP article, no print url
|
||||||
('Local News',
|
#print 'DEBUG>>>>>>>>: AP URL: ', url
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/local-news'),
|
return url
|
||||||
('Business',
|
else:
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/business'),
|
printURL = 'http://www2.timesdispatch.com/member-center/share-this/print/?content=ar' + article_num
|
||||||
('Local Business',
|
return printURL
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/local-business'),
|
|
||||||
('Politics',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/politics'),
|
|
||||||
('Virginia Politics',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/virginia-politics'),
|
|
||||||
('Editorials',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/editorial-desk'),
|
|
||||||
('Columnists and Blogs',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/news-columnists-blogs'),
|
|
||||||
('Opinion Columnists',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/opinion-editorial-columnists'),
|
|
||||||
('Letters to the Editor',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/opinion-letters'),
|
|
||||||
('Traffic',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/traffic'),
|
|
||||||
('Sports',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/sports2'),
|
|
||||||
('Entertainment/Life',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/entertainment'),
|
|
||||||
('Movies',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/movies'),
|
|
||||||
('Music',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/music'),
|
|
||||||
('Dining & Food',
|
|
||||||
'http://www2.timesdispatch.com/list/feed/rss/dining'),
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user