NYTimes Global by Krittika Goyal

This commit is contained in:
Kovid Goyal 2011-12-27 18:13:54 +05:30
parent 89ce33ebc9
commit 0e31649305
2 changed files with 24 additions and 57 deletions

View File

@ -1,63 +1,30 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Derry FitzGerald'
'''
iht.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
class NYTimesGlobal(BasicNewsRecipe):
title = u'NY Times Global'
language = 'en'
__author__ = 'Krittika Goyal'
oldest_article = 1 #days
max_articles_per_feed = 25
use_embedded_content = False
class InternationalHeraldTribune(BasicNewsRecipe):
title = u'The International Herald Tribune'
__author__ = 'Derry FitzGerald'
language = 'en'
oldest_article = 1
max_articles_per_feed = 30
no_stylesheets = True
auto_cleanup = True
remove_tags = [dict(name='div', attrs={'class':['footer','header']}),
dict(name=['form'])]
preprocess_regexps = [
(re.compile(r'<!-- webtrends.*', re.DOTALL),
lambda m:'</body></html>')
]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
remove_empty_feeds = True
feeds = [
(u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
(u'Business', u'http://www.iht.com/rss/business.xml'),
(u'Americas', u'http://www.iht.com/rss/america.xml'),
(u'Europe', u'http://www.iht.com/rss/europe.xml'),
(u'Asia', u'http://www.iht.com/rss/asia.xml'),
(u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
(u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
(u'Technology', u'http://www.iht.com/rss/technology.xml'),
(u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
(u'Sports', u'http://www.iht.com/rss/sports.xml'),
(u'Culture', u'http://www.iht.com/rss/arts.xml'),
(u'Style and Design', u'http://www.iht.com/rss/style.xml'),
(u'Travel', u'http://www.iht.com/rss/travel.xml'),
(u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
(u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
(u'Properties', u'http://www.iht.com/rss/properties.xml')
]
temp_files = []
articles_are_obfuscated = True
masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
def get_obfuscated_article(self, url):
br = self.get_browser()
br.open(url)
response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
html = response1.read()
self.temp_files.append(PersistentTemporaryFile('_iht.html'))
self.temp_files[-1].write(html)
self.temp_files[-1].close()
return self.temp_files[-1].name
('NYTimes',
'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
('NYTimes global',
'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
('World',
'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
('U.S.',
'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
('Business',
'http://feeds.nytimes.com/nyt/rss/Business'),
('Sports',
'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
('Technology',
'http://feeds.nytimes.com/nyt/rss/Technology'),
]

View File

@ -21,7 +21,7 @@ NS = 'http://calibre-ebook.com/recipe_collection'
E = ElementMaker(namespace=NS, nsmap={None:NS})
def iterate_over_builtin_recipe_files():
exclude = ['craigslist', 'iht', 'toronto_sun',
exclude = ['craigslist', 'toronto_sun',
'livemint']
d = os.path.dirname
base = os.path.join(d(d(d(d(d(d(os.path.abspath(__file__))))))), 'recipes')