mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 10:37:00 -04:00 
			
		
		
		
	Fix #1170798 (Metro NL site changed, updated recipe)
This commit is contained in:
		
							parent
							
								
									6be40de8f1
								
							
						
					
					
						commit
						00b8c40c63
					
				| @ -36,6 +36,9 @@ from BeautifulSoup import BeautifulSoup | |||||||
|     Changed order of regex to speedup proces |     Changed order of regex to speedup proces | ||||||
|  Version 1.9.3 23-05-2012 |  Version 1.9.3 23-05-2012 | ||||||
|     Updated Cover image |     Updated Cover image | ||||||
|  |  Version 1.9.4 19-04-2013 | ||||||
|  |     Added regex filter for mailto | ||||||
|  |     Updated for new layout of metro-site | ||||||
| ''' | ''' | ||||||
| 
 | 
 | ||||||
| class AdvancedUserRecipe1306097511(BasicNewsRecipe): | class AdvancedUserRecipe1306097511(BasicNewsRecipe): | ||||||
| @ -43,7 +46,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): | |||||||
|     oldest_article = 1.2 |     oldest_article = 1.2 | ||||||
|     max_articles_per_feed = 25 |     max_articles_per_feed = 25 | ||||||
|     __author__  = u'DrMerry' |     __author__  = u'DrMerry' | ||||||
|     description = u'Metro Nederland' |     description = u'Metro Nederland v1.9.4 2013-04-19' | ||||||
|     language = u'nl' |     language = u'nl' | ||||||
|     simultaneous_downloads = 5 |     simultaneous_downloads = 5 | ||||||
|     masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif' |     masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif' | ||||||
| @ -68,13 +71,17 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): | |||||||
|         #(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em') |         #(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em') | ||||||
|         ] |         ] | ||||||
|          |          | ||||||
|     remove_tags_before= dict(id='date') |     remove_tags_before= dict(id='subwrapper') | ||||||
|     remove_tags_after = [dict(name='div', attrs={'class':['column-1-3','gallery-text']})]#id='share-and-byline')] |     remove_tags_after = dict(name='div', attrs={'class':['body-area','article-main-area']}) | ||||||
|  | #name='div', attrs={'class':['subwrapper']})] | ||||||
|  | #'column-1-3','gallery-text']})]#id='share-and-byline')] | ||||||
|  | 
 | ||||||
|  |     filter_regexps = [r'mailto:.*'] | ||||||
| 
 | 
 | ||||||
|     remove_tags = [ |     remove_tags = [ | ||||||
|         dict(name=['iframe','script','noscript','style']), |         dict(name=['iframe','script','noscript','style']), | ||||||
|         dict(name='div', attrs={'class':['column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}), |         dict(name='div', attrs={'class':['aside clearfix','aside clearfix middle-col-line','comments','share-tools','article-right-column','column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}), | ||||||
|         dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1']), |         dict(id=['article-2','googleads','column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1','sharez_container','ts-container','topshares','ts-title']), | ||||||
|         dict(name='a', attrs={'name':'comments'}), |         dict(name='a', attrs={'name':'comments'}), | ||||||
|         #dict(name='div', attrs={'data-href'}), |         #dict(name='div', attrs={'data-href'}), | ||||||
|         dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}), |         dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}), | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user