mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 02:27:01 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			64 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			64 lines
		
	
	
		
			2.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| __license__ = 'GPL v3'
 | |
| __copyright__ = '2013, Alexander Schremmer <alex@alexanderweb.de>, Robert Riemann <robert@riemann.cc>'
 | |
| 
 | |
| import re
 | |
| 
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| 
 | |
| 
 | |
| class TazRSSRecipe(BasicNewsRecipe):
 | |
|     title = u'Taz - die Tageszeitung'
 | |
|     description = u'Taz.de - die tageszeitung (Anpassung von Robert)'
 | |
|     __author__ = 'Alexander Schremmer, Robert Riemann'
 | |
|     language = 'de'
 | |
|     lang = 'de-DE'
 | |
|     category = 'news, Germany'
 | |
|     timefmt = ' [%a, %d %b %Y]'
 | |
|     publication_type = 'newspaper'
 | |
|     remove_empty_feeds = True
 | |
|     use_embedded_content = False
 | |
|     oldest_article = 7
 | |
|     max_articles_per_feed = 100
 | |
|     publisher = 'taz Entwicklungs GmbH & Co. Medien KG'
 | |
|     # masthead_url = u'http://galeria-autonomica.de/wp-content/uploads/a_taz-logo.gif'
 | |
|     masthead_url = u'http://upload.wikimedia.org/wikipedia/de/thumb/1/15/Die-Tageszeitung-Logo.svg/500px-Die-Tageszeitung-Logo.svg.png'
 | |
| 
 | |
|     conversion_options = {'publisher': publisher,
 | |
|                           'language': lang,
 | |
|                           }
 | |
|     feeds = [
 | |
|         (u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'),
 | |
|         (u'Politik', u'http://www.taz.de/Politik/!p2;rss/'),
 | |
|         (u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'),
 | |
|         (u'Netz', u'http://www.taz.de/Netz/!p5;rss/'),
 | |
|         (u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'),
 | |
|         (u'Leben', u'http://www.taz.de/Leben/!p10;rss/'),
 | |
|         (u'Sport', u'http://www.taz.de/Sport/!p12;rss/'),
 | |
|         (u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'),
 | |
|         (u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'),
 | |
|         (u'Nord', u'http://www.taz.de/Nord/!p11;rss/')
 | |
|     ]
 | |
|     # omit articles already linked in Schlagzeilen feed
 | |
|     ignore_duplicate_articles = {'title', 'url'}
 | |
| 
 | |
|     # use the cover presented on the homepage
 | |
|     cover_url = 'http://www.taz.de/digitaz/.s1jpeg320'
 | |
| 
 | |
|     no_stylesheets = True  # default value is False, but True makes process much faster
 | |
|     keep_only_tags = [
 | |
|         dict(name=['div'], attrs={
 | |
|              'class': re.compile(r'.*\bsect_article\b.*')})
 | |
|     ]
 | |
|     remove_tags = [
 | |
|         dict(name=['div'], attrs={'class': 'sectfoot'}),
 | |
|         # remove: taz paywall
 | |
|         dict(name=['div'], attrs={'id': 'tzi_paywall'})
 | |
|     ]
 | |
| 
 | |
| # with article pictures on Kindle super-slow
 | |
| #    def populate_article_metadata(self, article, soup, first):
 | |
| #        if first and hasattr(self, 'add_toc_thumbnail'):
 | |
| #            picdiv = soup.find('img')
 | |
| #            if picdiv is not None:
 | |
| #                self.add_toc_thumbnail(article,picdiv['src'])
 |