mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix #1170798 (Metro NL site changed, updated recipe)
This commit is contained in:
parent
6be40de8f1
commit
00b8c40c63
@ -36,6 +36,9 @@ from BeautifulSoup import BeautifulSoup
|
|||||||
Changed order of regex to speedup proces
|
Changed order of regex to speedup proces
|
||||||
Version 1.9.3 23-05-2012
|
Version 1.9.3 23-05-2012
|
||||||
Updated Cover image
|
Updated Cover image
|
||||||
|
Version 1.9.4 19-04-2013
|
||||||
|
Added regex filter for mailto
|
||||||
|
Updated for new layout of metro-site
|
||||||
'''
|
'''
|
||||||
|
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
@ -43,7 +46,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
oldest_article = 1.2
|
oldest_article = 1.2
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
__author__ = u'DrMerry'
|
__author__ = u'DrMerry'
|
||||||
description = u'Metro Nederland'
|
description = u'Metro Nederland v1.9.4 2013-04-19'
|
||||||
language = u'nl'
|
language = u'nl'
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
|
masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
|
||||||
@ -68,13 +71,17 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
#(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em')
|
#(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_before= dict(id='date')
|
remove_tags_before= dict(id='subwrapper')
|
||||||
remove_tags_after = [dict(name='div', attrs={'class':['column-1-3','gallery-text']})]#id='share-and-byline')]
|
remove_tags_after = dict(name='div', attrs={'class':['body-area','article-main-area']})
|
||||||
|
#name='div', attrs={'class':['subwrapper']})]
|
||||||
|
#'column-1-3','gallery-text']})]#id='share-and-byline')]
|
||||||
|
|
||||||
|
filter_regexps = [r'mailto:.*']
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['iframe','script','noscript','style']),
|
dict(name=['iframe','script','noscript','style']),
|
||||||
dict(name='div', attrs={'class':['column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
dict(name='div', attrs={'class':['aside clearfix','aside clearfix middle-col-line','comments','share-tools','article-right-column','column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
||||||
dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1']),
|
dict(id=['article-2','googleads','column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1','sharez_container','ts-container','topshares','ts-title']),
|
||||||
dict(name='a', attrs={'name':'comments'}),
|
dict(name='a', attrs={'name':'comments'}),
|
||||||
#dict(name='div', attrs={'data-href'}),
|
#dict(name='div', attrs={'data-href'}),
|
||||||
dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}),
|
dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user