from calibre.web.feeds.news import BasicNewsRecipe import re from calibre.utils.magick import Image, create_canvas class AdvancedUserRecipe1307556816(BasicNewsRecipe): title = u'Geek and Poke' __author__ = u'DrMerry' description = u'Geek and Poke Cartoons' publisher = u'Oliver Widder' author = u'Oliver Widder, DrMerry (calibre-code), calibre' oldest_article = 31 max_articles_per_feed = 100 language = u'en' simultaneous_downloads = 1 #delay = 1 timefmt = ' [%a, %d %B, %Y]' summary_length = -1 no_stylesheets = True category = 'News.IT, Cartoon, Humor, Geek' use_embedded_content = False cover_url = 'http://geekandpoke.typepad.com/aboutcoders.jpeg' remove_javascript = True remove_empty_feeds = True publication_type = 'blog' masthead_url = None conversion_options = { 'comments' : '' ,'tags' : category ,'language' : language ,'publisher' : publisher ,'author' : author } remove_tags_before = dict(name='p', attrs={'class':'content-nav'}) remove_tags_after = dict(name='div', attrs={'class':'entry-content'}) remove_tags = [dict(name='div', attrs={'class':'entry-footer'}), dict(name='div', attrs={'id':'alpha'}), dict(name='div', attrs={'id':'gamma'}), dict(name='iframe'), dict(name='p', attrs={'class':'content-nav'})] filter_regexps = [(r'feedburner\.com'), (r'pixel.quantserve\.com'), (r'googlesyndication\.com'), (r'yimg\.com'), (r'scorecardresearch\.com')] preprocess_regexps = [(re.compile(r'(

( |\s)*

|]*>Tweet|]*>|||]*>[^<]*[^<]*)', re.DOTALL|re.IGNORECASE),lambda match: ''), (re.compile(r'( |\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '), (re.compile(r'(]*>)]>((?!', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + match.group(2) + ''), (re.compile(r'(]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: '
' + match.group(2) + '
' + match.group(1) + '
'), (re.compile(r'()+', re.DOTALL|re.IGNORECASE),lambda match: '
'), ] extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}' def postprocess_html(self, soup, first): for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')): iurl = tag['src'] img = Image() img.open(iurl) #width, height = img.size #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height img.trim(0) #width, height = img.size #print '***TRIMMED img width is: ', width, 'height is: ', height left=0 top=0 border_color='#ffffff' width, height = img.size #print '***retrieved img width is: ', width, 'height is: ', height height_correction = 1.17 canvas = create_canvas(width, height*height_correction,border_color) canvas.compose(img, left, top) #img = canvas #img.save(iurl) canvas.save(iurl) #width, height = canvas.size #print '***NEW img width is: ', width, 'height is: ', height return soup feeds = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']