mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
64 lines
2.6 KiB
Plaintext
64 lines
2.6 KiB
Plaintext
__license__ = 'GPL v3'
|
|
__copyright__ = '2008, Derry FitzGerald'
|
|
'''
|
|
iht.com
|
|
'''
|
|
import re
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
from calibre.ptempfile import PersistentTemporaryFile
|
|
|
|
|
|
class InternationalHeraldTribune(BasicNewsRecipe):
|
|
title = u'The International Herald Tribune'
|
|
__author__ = 'Derry FitzGerald'
|
|
language = 'en'
|
|
|
|
oldest_article = 1
|
|
max_articles_per_feed = 30
|
|
no_stylesheets = True
|
|
|
|
remove_tags = [dict(name='div', attrs={'class':['footer','header']}),
|
|
dict(name=['form'])]
|
|
preprocess_regexps = [
|
|
(re.compile(r'<!-- webtrends.*', re.DOTALL),
|
|
lambda m:'</body></html>')
|
|
]
|
|
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
|
|
|
remove_empty_feeds = True
|
|
|
|
feeds = [
|
|
(u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
|
|
(u'Business', u'http://www.iht.com/rss/business.xml'),
|
|
(u'Americas', u'http://www.iht.com/rss/america.xml'),
|
|
(u'Europe', u'http://www.iht.com/rss/europe.xml'),
|
|
(u'Asia', u'http://www.iht.com/rss/asia.xml'),
|
|
(u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
|
|
(u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
|
|
(u'Technology', u'http://www.iht.com/rss/technology.xml'),
|
|
(u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
|
|
(u'Sports', u'http://www.iht.com/rss/sports.xml'),
|
|
(u'Culture', u'http://www.iht.com/rss/arts.xml'),
|
|
(u'Style and Design', u'http://www.iht.com/rss/style.xml'),
|
|
(u'Travel', u'http://www.iht.com/rss/travel.xml'),
|
|
(u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
|
|
(u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
|
|
(u'Properties', u'http://www.iht.com/rss/properties.xml')
|
|
]
|
|
temp_files = []
|
|
articles_are_obfuscated = True
|
|
|
|
masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
|
|
|
|
def get_obfuscated_article(self, url):
|
|
br = self.get_browser()
|
|
br.open(url)
|
|
response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
|
|
html = response1.read()
|
|
|
|
self.temp_files.append(PersistentTemporaryFile('_iht.html'))
|
|
self.temp_files[-1].write(html)
|
|
self.temp_files[-1].close()
|
|
return self.temp_files[-1].name
|