mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 10:37:00 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			93 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			93 lines
		
	
	
		
			3.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| from __future__ import print_function
 | |
| 
 | |
| __license__   = 'GPL v3'
 | |
| __author__ = 'GabrieleMarini, based on Darko Miletic'
 | |
| __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>, Gabriele Marini'
 | |
| __version__ = 'v1.02 Marini Gabriele '
 | |
| __date__ = '10, January 2010'
 | |
| __description__ = 'Italian daily newspaper'
 | |
| 
 | |
| '''
 | |
| http://www.corrieredellosport.it/
 | |
| '''
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| 
 | |
| 
 | |
| class AutoPR(BasicNewsRecipe):
 | |
|     __author__ = 'Gabriele Marini'
 | |
|     description = 'Auto and Formula 1'
 | |
| 
 | |
|     cover_url = 'http://www.auto.it/res/imgs/logo_Auto.png'
 | |
| 
 | |
|     title = u'Auto Prove'
 | |
|     publisher = 'CONTE Editore'
 | |
|     category = 'Sport'
 | |
| 
 | |
|     language = 'it'
 | |
|     timefmt = '[%a, %d %b, %Y]'
 | |
| 
 | |
|     oldest_article = 60
 | |
|     max_articles_per_feed = 20
 | |
|     use_embedded_content = False
 | |
|     recursion = 100
 | |
| 
 | |
|     remove_javascript = True
 | |
|     no_stylesheets = True
 | |
| 
 | |
|     # html2lrf_options = [
 | |
|     #                      '--comment', description
 | |
|     #                    , '--category', category
 | |
|     #                    , '--publisher', publisher
 | |
|     #                    , '--ignore-tables'
 | |
|     #                    ]
 | |
| 
 | |
|     keep_only_tags = [
 | |
|         dict(name='h2', attrs={'class': ['tit_Article y_Txt']}),
 | |
|         dict(name='h2', attrs={'class': ['tit_Article']}),
 | |
|         dict(name='div', attrs={'class': ['box_Img newsdet_new ']}),
 | |
|         dict(name='div', attrs={'class': ['box_Img newsdet_as ']}),
 | |
|         dict(name='table', attrs={'class': ['table_A']}),
 | |
|         dict(name='div', attrs={'class': ['txt_Article txtBox_cms']}),
 | |
|         dict(name='testoscheda')]
 | |
| 
 | |
|     def parse_index(self):
 | |
|         feeds = []
 | |
|         for title, url in [
 | |
|             ("Prove su Strada", "http://www.auto.it/rss/prove+6.xml")
 | |
|         ]:
 | |
|             soup = self.index_to_soup(url)
 | |
|             soup = soup.find('channel')
 | |
|             print(soup)
 | |
| 
 | |
|             for article in soup.findAllNext('item'):
 | |
|                 title = self.tag_to_string(article.title)
 | |
|                 date = self.tag_to_string(article.pubDate)
 | |
|                 description = self.tag_to_string(article.description)
 | |
|                 link = self.tag_to_string(article.guid)
 | |
| #               print article
 | |
|                 articles = self.create_links_append(link, date, description)
 | |
|                 if articles:
 | |
|                     feeds.append((title, articles))
 | |
|             return feeds
 | |
| 
 | |
|     def create_links_append(self, link, date, description):
 | |
|         current_articles = []
 | |
| 
 | |
|         current_articles.append(
 | |
|             {'title': 'Generale', 'url': link, 'description': description, 'date': date}),
 | |
|         current_articles.append({'title': 'Design', 'url': link.replace(
 | |
|             'scheda', 'design'), 'description': 'scheda', 'date': ''}),
 | |
|         current_articles.append({'title': 'Interni', 'url': link.replace(
 | |
|             'scheda', 'interni'), 'description': 'Interni', 'date': ''}),
 | |
|         current_articles.append({'title': 'Tecnica', 'url': link.replace(
 | |
|             'scheda', 'tecnica'), 'description': 'Tecnica', 'date': ''}),
 | |
|         current_articles.append({'title': 'Su Strada', 'url': link.replace(
 | |
|             'scheda', 'su_strada'), 'description': 'Su Strada', 'date': ''}),
 | |
|         current_articles.append({'title': 'Pagella', 'url': link.replace(
 | |
|             'scheda', 'pagella'), 'description': 'Pagella', 'date': ''}),
 | |
|         current_articles.append({'title': 'Rilevamenti', 'url': link.replace(
 | |
|             'scheda', 'telemetria'), 'description': 'Rilevamenti', 'date': ''})
 | |
| 
 | |
|         return current_articles
 |