Fix Il Giornale

This commit is contained in:
Kovid Goyal 2011-11-10 07:56:01 +05:30
parent 566a8ef163
commit 5e56b3dd60

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
__license__ = 'GPL v3' __license__ = 'GPL v3'
__author__ = 'Gabriele Marini, based on Darko Miletic' __author__ = 'Gambarini, based on Darko Miletic'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
description = 'Italian daily newspaper - 19-04-2010' description = 'Italian daily newspaper - 09-11-2011'
''' '''
http://www.ilgiornale.it/ http://www.ilgiornale.it/
@ -11,7 +11,7 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class IlGiornale(BasicNewsRecipe): class IlGiornale(BasicNewsRecipe):
__author__ = 'Marini Gabriele' __author__ = 'GAMBARINI'
description = 'Italian daily newspaper' description = 'Italian daily newspaper'
cover_url = 'http://www.ilgiornale.it/img_v1/logo.gif' cover_url = 'http://www.ilgiornale.it/img_v1/logo.gif'
@ -23,9 +23,8 @@ class IlGiornale(BasicNewsRecipe):
timefmt = '[%a, %d %b, %Y]' timefmt = '[%a, %d %b, %Y]'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 50 max_articles_per_feed = 100
use_embedded_content = False use_embedded_content = False
recursion = 100
no_stylesheets = True no_stylesheets = True
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
@ -38,11 +37,11 @@ class IlGiornale(BasicNewsRecipe):
def print_version(self, url): def print_version(self, url):
raw = self.browser.open(url).read() raw = self.browser.open(url).read()
soup = BeautifulSoup(raw.decode('utf8', 'replace')) soup = BeautifulSoup(raw.decode('utf8', 'replace'))
all_print_tags = soup.find('div', {'style':'float:left; width:35%;'}) all_print_tags = soup.find('div', {'id':'print_article'})
print_link = all_print_tags.contents[1] print_link = all_print_tags.a
if all_print_tags is None: if print_link is None:
return url return url
return print_link['href'] return 'http://www.ilgiornale.it' + print_link['href']
feeds = [ feeds = [