mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
NYTimes Global by Krittika Goyal
This commit is contained in:
parent
89ce33ebc9
commit
0e31649305
@ -1,63 +1,30 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Derry FitzGerald'
|
||||
'''
|
||||
iht.com
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class NYTimesGlobal(BasicNewsRecipe):
|
||||
title = u'NY Times Global'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
|
||||
class InternationalHeraldTribune(BasicNewsRecipe):
|
||||
title = u'The International Herald Tribune'
|
||||
__author__ = 'Derry FitzGerald'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['footer','header']}),
|
||||
dict(name=['form'])]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!-- webtrends.*', re.DOTALL),
|
||||
lambda m:'</body></html>')
|
||||
]
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
|
||||
(u'Business', u'http://www.iht.com/rss/business.xml'),
|
||||
(u'Americas', u'http://www.iht.com/rss/america.xml'),
|
||||
(u'Europe', u'http://www.iht.com/rss/europe.xml'),
|
||||
(u'Asia', u'http://www.iht.com/rss/asia.xml'),
|
||||
(u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
|
||||
(u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
|
||||
(u'Technology', u'http://www.iht.com/rss/technology.xml'),
|
||||
(u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
|
||||
(u'Sports', u'http://www.iht.com/rss/sports.xml'),
|
||||
(u'Culture', u'http://www.iht.com/rss/arts.xml'),
|
||||
(u'Style and Design', u'http://www.iht.com/rss/style.xml'),
|
||||
(u'Travel', u'http://www.iht.com/rss/travel.xml'),
|
||||
(u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
|
||||
(u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
|
||||
(u'Properties', u'http://www.iht.com/rss/properties.xml')
|
||||
]
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
|
||||
html = response1.read()
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_iht.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
('NYTimes',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
|
||||
('NYTimes global',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
|
||||
('World',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
|
||||
('U.S.',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
|
||||
('Business',
|
||||
'http://feeds.nytimes.com/nyt/rss/Business'),
|
||||
('Sports',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
|
||||
('Technology',
|
||||
'http://feeds.nytimes.com/nyt/rss/Technology'),
|
||||
]
|
||||
|
@ -21,7 +21,7 @@ NS = 'http://calibre-ebook.com/recipe_collection'
|
||||
E = ElementMaker(namespace=NS, nsmap={None:NS})
|
||||
|
||||
def iterate_over_builtin_recipe_files():
|
||||
exclude = ['craigslist', 'iht', 'toronto_sun',
|
||||
exclude = ['craigslist', 'toronto_sun',
|
||||
'livemint']
|
||||
d = os.path.dirname
|
||||
base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')
|
||||
|
Loading…
x
Reference in New Issue
Block a user