diff --git a/recipes/metro_news_nl.recipe b/recipes/metro_news_nl.recipe index 2ad09ded8d..9191f7caec 100644 --- a/recipes/metro_news_nl.recipe +++ b/recipes/metro_news_nl.recipe @@ -27,70 +27,66 @@ from BeautifulSoup import BeautifulSoup Version 1.9.1 18-04-2012 removed some debug settings updated code to match new metro-layout - Version 1.9.2 14-04-2012 + Version 1.9.2 24-04-2012 updated code to match new metro-layout + Version 1.9.3 25-04-2012 + Changed a lot of custom code into calibre code as the default code of calibre has become much faster since the first version fo this recipe + Added new feeds + Updated css + Changed order of regex to speedup proces ''' class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Metro Nieuws NL' oldest_article = 1.2 max_articles_per_feed = 25 - __author__ = u'DrMerry' - description = u'Metro Nederland' - language = u'nl' - simultaneous_downloads = 3 + __author__ = u'DrMerry' + description = u'Metro Nederland' + language = u'nl' + simultaneous_downloads = 5 masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif' timeout = 10 - center_navbar = True - timefmt = ' [%A, %d %b %Y]' + center_navbar = True + timefmt = ' [%A, %d %b %Y]' no_stylesheets = True remove_javascript = True remove_empty_feeds = True - cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg' + cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg' publication_type = 'newspaper' - encoding = 'utf-8' - remove_attributes = ['style', 'font', 'width', 'height'] + encoding = 'utf-8' + remove_attributes = ['style', 'font', 'width', 'height', 'itemtype', 'itemprop', 'itemscope']#, 'href'] use_embedded_content = False - conversion_options = { - 'authors' : 'Metro Nederland & calibre & DrMerry', - 'author_sort' : 'Metro Nederland & calibre & DrMerry', - 'publisher' : 'DrMerry/Metro Nederland' - } - extra_css = 'body {padding:5px 0; background-color:#fff;font-size: 1em}\ - #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {margin-bottom: 10px}\ - #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name, p.article-image-caption .credits {font-size:0.5em}\ - .article-box-fact.module-title, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {clear:both}\ - .article-box-fact.module-title {padding: 8px 0}\ - h1.title {color: #000;font-size: 1.4em}\ - .article-box-fact.module-title, h2.subtitle {font-size: 1.2em}\ - h1.title, h2.subtitle, .article-body p{padding-bottom:10px}\ - h1.title, p.article-image-caption {font-weight: 300}\ - div.column-1-3{margin-left: 19px;padding-right: 9px}\ - div.column-1-2 {display: inline;padding-right: 7px}\ - p.article-image-caption {font-size: 0.6em;margin-top: 5px}\ - p.article-image-caption, #date, div.share-and-byline div.byline div.text div.title, div.share-and-byline div.byline div.text div.name {color: #616262}\ - p.article-image-caption .credits {font-style: italic}\ - div.article-image-caption {width: 246px;margin: 5px}\ - div.article-image-caption-2column {width: 373px}\ - div.article-image-caption-2column, div.article-image-caption-3column {margin-bottom: 5px}\ - img {border:0}\ - img, div.column-3 {padding:2px}\ - hr.merryhr {width:30%; border-width:0; margin-left:5px; background-color: #24763b}\ - div.column-3 {background-color:#eee; width:50%; margin:2px; float:right}\ - div.column-3 module-title {border: 1px solid #aaa}\ - div.article-box-fact div.subtitle, .article-box-fact.module-title, h2.subtitle {font-weight:bold}\ - div.article-box-fact div.subtitle, hr.merryhr, .article-box-fact.module-title {color: #24763b}' + extra_css = 'body{font-size:1em;padding:5px 0}body,a,h2{background-color:#fff;text-decoration:none;color:#000}#date,div.byline,p.article-image-caption .credits,.calibrenavbar{font-size:.5em}.article-box-fact.module-title,#date,div.byline{clear:both}.article-box-fact.module-title{margin:8px 0}.article-box-fact.module-title,h2{font-size:1.1em}h1.title{font-size:1.4em}h1.title,.article-body p,div.article-image-caption-2column,div.article-image-caption-3column,#date,div.byline{margin-bottom:.6em}div.article-box-fact div.subtitle,.article-box-fact.module-title,h1.title,p.article-image-caption{font-weight:700}div.column-1-3{margin-left:19px}div.column-1-2{display:inline}div.column-1-2,div.column-1-3{margin-right:7px}p.article-image-caption{font-size:.6em;margin-top:5px}p.article-image-caption,#date,div.byline{color:#616262}p.article-image-caption .credits{font-style:italic}div.article-image-caption{width:246px}div.article-image-caption-2column{width:373px}div.column-3{background-color:#eee;float:right;width:50%}div.column-3 module-title{border:1px solid #aaa}div.article-box-fact div.subtitle,.article-box-fact.module-title{color:#24763b}div.byline{border-top:2px solid #24763b}div.column-3,img,div.column-3,p.small,div.article-image-caption{margin:.5em}img,p.small,.column1,h2{border:0;padding:0}.column1,h1,h2{margin:0}' preprocess_regexps = [ - (re.compile(r']+top-line[^>]+>', re.DOTALL|re.IGNORECASE), - lambda match: '
'), - (re.compile(r']+(metronieuws\.nl/[^>]+/templates/[^>]+jpe?g|metronieuws\.nl/internal\-roxen\-unit\.gif)[^>]+>', re.DOTALL|re.IGNORECASE), - lambda match: ''), + (re.compile(r'( |\s|]+metronieuws\.nl/([^>]+/templates/[^>]+\.jpe?g|internal\-roxen\-unit\.gif)[^>]+>)', re.DOTALL|re.IGNORECASE),lambda match: ' '), + #(re.compile(r'( |\s)+', re.DOTALL|re.IGNORECASE),lambda match:' '), + #(re.compile(r'<(a |/a)[^>]*>', re.DOTALL|re.IGNORECASE),lambda match:'') + #(re.compile('(