calibre/recipes/irish_times.recipe

63 lines
2.4 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns, 2013 Tom Scholl"
'''
irishtimes.com
'''
import urlparse, re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times'
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns, Tom Scholl"
language = 'en_IE'
masthead_url = 'http://www.irishtimes.com/assets/images/generic/website/logo_theirishtimes.png'
encoding = 'utf-8'
oldest_article = 1.0
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
temp_files = []
articles_are_obfuscated = True
feeds = [
('News', 'http://www.irishtimes.com/cmlink/the-irish-times-news-1.1319192'),
('World', 'http://www.irishtimes.com/cmlink/irishtimesworldfeed-1.1321046'),
('Politics', 'http://www.irishtimes.com/cmlink/irish-times-politics-rss-1.1315953'),
('Business', 'http://www.irishtimes.com/cmlink/the-irish-times-business-1.1319195'),
('Culture', 'http://www.irishtimes.com/cmlink/the-irish-times-culture-1.1319213'),
('Sport', 'http://www.irishtimes.com/cmlink/the-irish-times-sport-1.1319194'),
('Debate', 'http://www.irishtimes.com/cmlink/debate-1.1319211'),
('Life & Style', 'http://www.irishtimes.com/cmlink/the-irish-times-life-style-1.1319214'),
]
def get_obfuscated_article(self, url):
# Insert a pic from the original url, but use content from the print url
pic = None
pics = self.index_to_soup(url)
div = pics.find('div', {'class' : re.compile('image-carousel')})
if div:
pic = div.img
if pic:
try:
pic['src'] = urlparse.urljoin(url, pic['src'])
pic.extract()
except:
pic = None
content = self.index_to_soup(url + '?mode=print&ot=example.AjaxPageLayout.ot')
if pic:
content.p.insert(0, pic)
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write(content.prettify())
self.temp_files[-1].close()
return self.temp_files[-1].name