mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update Irish Times. Fixes #1159553 (Updated news recipe for the Irish Times)
This commit is contained in:
parent
8535e21694
commit
3eacc9cadb
@ -1,65 +1,62 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
|
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns, 2013 Tom Scholl"
|
||||||
'''
|
'''
|
||||||
irishtimes.com
|
irishtimes.com
|
||||||
'''
|
'''
|
||||||
import re
|
import urlparse, re
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
class IrishTimes(BasicNewsRecipe):
|
class IrishTimes(BasicNewsRecipe):
|
||||||
title = u'The Irish Times'
|
title = u'The Irish Times'
|
||||||
encoding = 'ISO-8859-15'
|
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns, Tom Scholl"
|
||||||
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
|
|
||||||
language = 'en_IE'
|
language = 'en_IE'
|
||||||
timefmt = ' (%A, %B %d, %Y)'
|
|
||||||
|
|
||||||
|
masthead_url = 'http://www.irishtimes.com/assets/images/generic/website/logo_theirishtimes.png'
|
||||||
|
|
||||||
|
encoding = 'utf-8'
|
||||||
oldest_article = 1.0
|
oldest_article = 1.0
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
simultaneous_downloads= 5
|
temp_files = []
|
||||||
|
articles_are_obfuscated = True
|
||||||
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':'footer'})]
|
|
||||||
extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }'
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
|
('News', 'http://www.irishtimes.com/cmlink/the-irish-times-news-1.1319192'),
|
||||||
('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
|
('World', 'http://www.irishtimes.com/cmlink/irishtimesworldfeed-1.1321046'),
|
||||||
('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
|
('Politics', 'http://www.irishtimes.com/cmlink/irish-times-politics-rss-1.1315953'),
|
||||||
('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
|
('Business', 'http://www.irishtimes.com/cmlink/the-irish-times-business-1.1319195'),
|
||||||
('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
|
('Culture', 'http://www.irishtimes.com/cmlink/the-irish-times-culture-1.1319213'),
|
||||||
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
|
('Sport', 'http://www.irishtimes.com/cmlink/the-irish-times-sport-1.1319194'),
|
||||||
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
|
('Debate', 'http://www.irishtimes.com/cmlink/debate-1.1319211'),
|
||||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
('Life & Style', 'http://www.irishtimes.com/cmlink/the-irish-times-life-style-1.1319214'),
|
||||||
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
|
|
||||||
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
|
|
||||||
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
|
|
||||||
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
|
|
||||||
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
|
|
||||||
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
|
|
||||||
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
|
|
||||||
('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
|
|
||||||
('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
|
|
||||||
('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
|
|
||||||
('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
|
|
||||||
('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def print_version(self, url):
|
def get_obfuscated_article(self, url):
|
||||||
if url.count('rss.feedsportal.com'):
|
# Insert a pic from the original url, but use content from the print url
|
||||||
#u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
pic = None
|
||||||
u = url.find('irishtimes')
|
pics = self.index_to_soup(url)
|
||||||
u = 'http://www.irishtimes.com' + url[u + 12:]
|
div = pics.find('div', {'class' : re.compile('image-carousel')})
|
||||||
u = u.replace('0C', '/')
|
if div:
|
||||||
u = u.replace('A', '')
|
pic = div.img
|
||||||
u = u.replace('0Bhtml/story01.htm', '_pf.html')
|
if pic:
|
||||||
else:
|
try:
|
||||||
u = url.replace('.html','_pf.html')
|
pic['src'] = urlparse.urljoin(url, pic['src'])
|
||||||
return u
|
pic.extract()
|
||||||
|
except:
|
||||||
|
pic = None
|
||||||
|
|
||||||
|
content = self.index_to_soup(url + '?mode=print&ot=example.AjaxPageLayout.ot')
|
||||||
|
if pic:
|
||||||
|
content.p.insert(0, pic)
|
||||||
|
|
||||||
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||||
|
self.temp_files[-1].write(content.prettify())
|
||||||
|
self.temp_files[-1].close()
|
||||||
|
return self.temp_files[-1].name
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
return article.link
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user