calibre/recipes/geek_poke.recipe
2019-03-22 23:47:49 +05:30

80 lines
3.4 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
import re
from calibre.utils.magick import Image, create_canvas
class AdvancedUserRecipe1307556816(BasicNewsRecipe):
title = u'Geek and Poke'
__author__ = u'DrMerry'
description = u'Geek and Poke Cartoons'
publisher = u'Oliver Widder'
author = u'Oliver Widder, DrMerry (calibre-code), calibre'
oldest_article = 31
max_articles_per_feed = 100
language = u'en'
simultaneous_downloads = 1
timefmt = ' [%a, %d %B, %Y]'
summary_length = -1
no_stylesheets = True
category = 'News.IT, Cartoon, Humor, Geek'
use_embedded_content = False
cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg'
remove_javascript = True
remove_empty_feeds = True
publication_type = 'blog'
masthead_url = None
conversion_options = {
'comments': '', 'tags': category, 'language': language, 'publisher': publisher, 'author': author
}
remove_tags_before = dict(name='p', attrs={'class': 'content-nav'})
remove_tags_after = dict(name='div', attrs={'class': 'entry-content'})
remove_tags = [dict(name='div', attrs={'class': 'entry-footer'}),
dict(name='div', attrs={'id': 'alpha'}),
dict(name='div', attrs={'id': 'gamma'}),
dict(name='iframe'),
dict(name='p', attrs={'class': 'content-nav'})]
filter_regexps = [(r'feedburner\.com'),
(r'pixel.quantserve\.com'),
(r'googlesyndication\.com'),
(r'yimg\.com'),
(r'scorecardresearch\.com')]
preprocess_regexps = [
(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->|<h2[^>]*>[^<]*</h2>[^<]*)', re.DOTALL | re.IGNORECASE), lambda match: ''),
(re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL |
re.IGNORECASE), lambda match: ' '),
(re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL |
re.IGNORECASE), lambda match: match.group(1) + match.group(2) + '</h3>'),
(re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL | re.IGNORECASE),
lambda match: '<div id="merryImage"><cite>' + match.group(2) + '</cite><br>' + match.group(1) + '</div>'),
(re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL |
re.IGNORECASE), lambda match: '<br>'),
]
extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
def postprocess_html(self, soup, first):
for tag in soup.findAll('img', src=True):
iurl = tag['src']
img = Image()
img.open(iurl)
# print '***img is: ', iurl, '\n****width is: ', width, 'height is:
# ', height
img.trim(0)
# print '***TRIMMED img width is: ', width, 'height is: ', height
left = 0
top = 0
border_color = '#ffffff'
width, height = img.size
# print '***retrieved img width is: ', width, 'height is: ', height
height_correction = 1.17
canvas = create_canvas(
width, height * height_correction, border_color)
canvas.compose(img, left, top)
canvas.save(iurl)
return soup
feeds = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']