mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #889294 (updated Metro NL)
This commit is contained in:
parent
86e5c79180
commit
e0a86fcc38
@ -1,3 +1,4 @@
|
|||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
import re
|
import re
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
@ -8,21 +9,27 @@ from calibre.utils.magick import Image
|
|||||||
version 1.4 Updated tags, delay and added autoclean 22-09-2011
|
version 1.4 Updated tags, delay and added autoclean 22-09-2011
|
||||||
version 1.5 Changes due to changes in site
|
version 1.5 Changes due to changes in site
|
||||||
version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
|
version 1.6 Added css, removed auto cleanup, added buitenland section, added use_embedded_content, added remove_attributes
|
||||||
Added som processing on pictures
|
Added some processing on pictures
|
||||||
Removed links in html
|
Removed links in html
|
||||||
Removed extre white characters
|
Removed extre white characters
|
||||||
changed handling of self closing span
|
changed handling of self closing span
|
||||||
'''
|
Version 1.7 11-11-2011 Changed oldest_article back to 1.5
|
||||||
|
changed è into è
|
||||||
|
updated remove tags
|
||||||
|
removed keep_only tags
|
||||||
|
'''
|
||||||
|
|
||||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||||
title = u'Metro Nieuws NL'
|
title = u'Metro Nieuws NL'
|
||||||
oldest_article = 2
|
oldest_article = 1.5
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
__author__ = u'DrMerry'
|
__author__ = u'DrMerry'
|
||||||
description = u'Metro Nederland'
|
description = u'Metro Nederland'
|
||||||
language = u'nl'
|
language = u'nl'
|
||||||
simultaneous_downloads = 5
|
simultaneous_downloads = 5
|
||||||
|
timeout = 2
|
||||||
#delay = 1
|
#delay = 1
|
||||||
|
center_navbar = True
|
||||||
#auto_cleanup = True
|
#auto_cleanup = True
|
||||||
#auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
|
#auto_cleanup_keep = '//div[@class="article-image-caption-2column"]/*|//div[@id="date"]/*|//div[@class="article-image-caption-3column"]/*'
|
||||||
timefmt = ' [%A, %d %b %Y]'
|
timefmt = ' [%A, %d %b %Y]'
|
||||||
@ -31,31 +38,32 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
|
cover_url = 'http://www.oldreadmetro.com/img/en/metroholland/last/1/small.jpg'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
remove_tags_before = dict(name='div', attrs={'id':'date'})
|
remove_tags_before = dict(id='date')
|
||||||
remove_tags_after = dict(name='div', attrs={'class':'article-body'})
|
remove_tags_after = dict(name='div', attrs={'class':'article-body'})
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
remove_attributes = ['style', 'font', 'width', 'height']
|
remove_attributes = ['style', 'font', 'width', 'height']
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
conversion_options = {
|
||||||
|
'authors' : 'Metro Nederland',
|
||||||
|
'author_sort' : 'Metro Nederland',
|
||||||
|
'publisher' : 'DrMerry/Metro Nederland'
|
||||||
|
}
|
||||||
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
|
extra_css = 'body {padding:5px 0px; background:#fff;font-size: 13px;}\
|
||||||
#date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
|
#date {clear: both;margin-left: 19px;font-size: 11px;font-weight: 300;color: #616262;height: 15px;}\
|
||||||
.article-box-fact.module-title {clear:both;border-top:1px solid black;border-bottom:4px solid black;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
|
.article-box-fact.module-title {clear:both;padding: 8px 0;color: #24763b;font-family: arial, sans-serif;font-size: 14px;font-weight: bold;}\
|
||||||
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;line-height: 1.15;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
|
h1.title {color: #000000;font-size: 44px;padding-bottom: 10px;font-weight: 300;} h2.subtitle {font-size: 13px;font-weight: 700;padding-bottom: 10px;}\
|
||||||
.article-body p{padding-bottom:10px;}div.column-1-3{float: left;display: inline;width: 567px;margin-left: 19px;border-right: 1px solid #CACACA;padding-right: 9px;}\
|
.article-body p{padding-bottom:10px;}div.column-1-3{margin-left: 19px;padding-right: 9px;}\
|
||||||
div.column-1-2 {float: left;display: inline;width: 373px;padding-right: 7px;border-right: 1px solid #CACACA;}\
|
div.column-1-2 {display: inline;padding-right: 7px;}\
|
||||||
p.article-image-caption {font-size: 12px;font-weight: 300;line-height: 1.4;color: #616262;margin-top: 5px;} \
|
p.article-image-caption {font-size: 12px;font-weight: 300;color: #616262;margin-top: 5px;} \
|
||||||
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
|
p.article-image-caption .credits {font-style: italic;font-size: 10px;}\
|
||||||
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
|
div.article-image-caption {width: 246px;margin-bottom: 5px;margin-left: 10px;}\
|
||||||
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
|
div.article-image-caption-2column {margin-bottom: 10px;width: 373px;} div.article-image-caption-3column {}\
|
||||||
img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
|
img {border:0px;} .img-mask {position:absolute;top:0px;left:0px;}'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class':[ 'article-image-caption-2column', 'article-image-caption-3column', 'article-body', 'article-box-fact']}),
|
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap', 'related-links'
|
||||||
dict(name='div', attrs={'id':['date']}),
|
|
||||||
dict(name='h1', attrs={'class':['title']}),
|
|
||||||
dict(name='h2', attrs={'class':['subtitle']})]
|
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'class':[ 'metroCommentFormWrap',
|
|
||||||
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
|
'commentForm', 'metroCommentInnerWrap', 'article-slideshow-counter-container', 'article-slideshow-control', 'ad', 'header-links',
|
||||||
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools', 'article-page-auto-pushes', 'footer-edit']}),
|
'art-rgt','pluck-app pluck-comm', 'share-and-byline', 'article-tools-below-title', 'col-179 ', 'related-links', 'clear padding-top-15', 'share-tools',
|
||||||
|
'article1','article-page-auto-pushes', 'footer-edit','clear']}),
|
||||||
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
|
dict(name='div', attrs={'id':['article-2', 'article-4', 'article-1', 'navigation', 'footer', 'header', 'comments', 'sidebar', 'share-and-byline']}),
|
||||||
dict(name='iframe')]
|
dict(name='iframe')]
|
||||||
|
|
||||||
@ -70,26 +78,8 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
iurl = tag['src']
|
iurl = tag['src']
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(iurl)
|
img.open(iurl)
|
||||||
#width, height = img.size
|
|
||||||
#print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
|
|
||||||
img.trim(0)
|
img.trim(0)
|
||||||
img.save(iurl)
|
img.save(iurl)
|
||||||
'''
|
|
||||||
#width, height = img.size
|
|
||||||
#print '***TRIMMED img width is: ', width, 'height is: ', height
|
|
||||||
left=0
|
|
||||||
top=0
|
|
||||||
border_color='#ffffff'
|
|
||||||
width, height = img.size
|
|
||||||
#print '***retrieved img width is: ', width, 'height is: ', height
|
|
||||||
height_correction = 1.17
|
|
||||||
canvas = create_canvas(width, height*height_correction,border_color)
|
|
||||||
canvas.compose(img, left, top)
|
|
||||||
#img = canvas
|
|
||||||
canvas.save(iurl)
|
|
||||||
#width, height = canvas.size
|
|
||||||
#print '***NEW img width is: ', width, 'height is: ', height
|
|
||||||
'''
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user