Fix #3972 (The Irish Times feeds have changed)

This commit is contained in:
Kovid Goyal 2009-11-09 09:05:04 -07:00
parent 2f43bc64ea
commit 569dbeb2b9
2 changed files with 53 additions and 51 deletions

View File

@ -6,10 +6,7 @@ __docformat__ = 'restructuredtext en'
''' '''
www.guardian.co.uk www.guardian.co.uk
''' '''
import string
import re
from calibre import strftime from calibre import strftime
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Guardian(BasicNewsRecipe): class Guardian(BasicNewsRecipe):

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Derry FitzGerald. 2009 Modified by Ray Kinsella' __copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan"
''' '''
irishtimes.com irishtimes.com
''' '''
@ -9,13 +9,16 @@ from calibre.web.feeds.news import BasicNewsRecipe
class IrishTimes(BasicNewsRecipe): class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times' title = u'The Irish Times'
__author__ = 'Derry FitzGerald and Ray Kinsella' __author__ = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
language = 'en' language = 'en'
timefmt = ' (%A, %B %e, %Y)'
oldest_article = 3
no_stylesheets = True no_stylesheets = True
simultaneous_downloads= 1 simultaneous_downloads= 1
r = re.compile('.*(?P<url>http:\/\/www.irishtimes.com\/.*\.html).*') r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
remove_tags = [dict(name='div', attrs={'class':'footer'})] remove_tags = [dict(name='div', attrs={'class':'footer'})]
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }' extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
@ -32,9 +35,11 @@ class IrishTimes(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
return url.replace('.html', '_pf.html') if url.count('rss.feedsportal.com'):
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
else:
u = url.replace('.html','_pf.html')
return u
def get_article_url(self, article): def get_article_url(self, article):
m = self.r.match(article.get('description', None)) return article.link
print m.group('url')
return m.group('url')