From e0a86fcc386cc5bb18ff9507cfd1b3b2044372b3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 12 Nov 2011 08:30:19 +0530 Subject: [PATCH] Fix #889294 (updated Metro NL) --- recipes/metro_news_nl.recipe | 58 +++++++++++++++--------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/recipes/metro_news_nl.recipe b/recipes/metro_news_nl.recipe index 3d12128e29..ce54f6099c 100644 --- a/recipes/metro_news_nl.recipe +++ b/recipes/metro_news_nl.recipe @@ -1,3 +1,4 @@ +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai from calibre.web.feeds.news import BasicNewsRecipe import re from calibre.utils.magick import Image @@ -8,21 +9,27 @@ from calibre.utils.magick import Image version 1.4 Updated tags, delay and added autoclean 22-09-2011 version 1.5 Changes due to changes in site version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes - Added som processing on pictures + Added some processing on pictures Removed links in html Removed extre white characters changed handling of self closing span - ''' + Version 1.7 11-11-2011 Changed oldest_article back to 1.5 + changed รจ into è + updated remove tags + removed keep_only tags +''' class AdvancedUserRecipe1306097511(BasicNewsRecipe): title = u'Metro Nieuws NL' - oldest_article = 2 + oldest_article = 1.5 max_articles_per_feed = 100 __author__ = u'DrMerry' description = u'Metro Nederland' language = u'nl' simultaneous_downloads = 5 + timeout = 2 #delay = 1 + center_navbar = True #auto_cleanup = True #auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*' timefmt = ' [%A, %d %b %Y]' @@ -31,31 +38,32 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): remove_empty_feeds = True cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg' publication_type = 'newspaper' - remove_tags_before = dict(name='div', attrs={'id':'date'}) + remove_tags_before = dict(id='date') remove_tags_after = dict(name='div', attrs={'class':'article-body'}) encoding = 'utf-8' remove_attributes = ['style', 'font', 'width', 'height'] use_embedded_content = False + conversion_options = { + 'authors' : 'Metro Nederland', + 'author_sort' : 'Metro Nederland', + 'publisher' : 'DrMerry/Metro Nederland' + } extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\ #date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\ - .article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\ - h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\ - .article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\ - div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\ - p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \ + .article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\ + h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\ + .article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\ + div.column-1-2 {display: inline;padding-right: 7px;}\ + p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \ p.article-image-caption .credits {font-style: italic;font-size: 10px;}\ div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\ div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\ img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}' - keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}), - dict(name='div', attrs={'id':['date']}), - dict(name='h1', attrs={'class':['title']}), - dict(name='h2', attrs={'class':['subtitle']})] - - remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', + remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', 'related-links' 'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links', - 'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}), + 'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', + 'article1','article-page-auto-pushes', 'footer-edit','clear']}), dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}), dict(name='iframe')] @@ -70,26 +78,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe): iurl = tag['src'] img = Image() img.open(iurl) - #width, height = img.size - #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height img.trim(0) img.save(iurl) - ''' - #width, height = img.size - #print '***TRIMMED img width is: ', width, 'height is: ', height - left=0 - top=0 - border_color='#ffffff' - width, height = img.size - #print '***retrieved img width is: ', width, 'height is: ', height - height_correction = 1.17 - canvas = create_canvas(width, height*height_correction,border_color) - canvas.compose(img, left, top) - #img = canvas - canvas.save(iurl) - #width, height = canvas.size - #print '***NEW img width is: ', width, 'height is: ', height - ''' return soup feeds = [