Fix #872945 (Geek and poke small update)

2025-07-09 03:04:10 -04:00 · 2011-10-13 08:06:05 +05:30 · 2011-10-13 08:06:05 +05:30 · dff33cf99b
commit dff33cf99b
parent b6fab126dc
1 changed files with 22 additions and 11 deletions
--- a/recipes/geek_poke.recipe
+++ b/recipes/geek_poke.recipe
@ -1,6 +1,6 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
-from calibre.utils.magick import Image
+from calibre.utils.magick import Image, create_canvas
 class AdvancedUserRecipe1307556816(BasicNewsRecipe):
    title          = u'Geek and Poke'
@ -11,7 +11,7 @@ class AdvancedUserRecipe1307556816(BasicNewsRecipe):
    oldest_article = 31
    max_articles_per_feed = 100
    language       = u'en'
-    simultaneous_downloads = 5
+    simultaneous_downloads = 1
    #delay          = 1
    timefmt        = ' [%a, %d %B, %Y]'
    summary_length = -1
@ -22,6 +22,7 @@ class AdvancedUserRecipe1307556816(BasicNewsRecipe):
    remove_javascript = True
    remove_empty_feeds = True
    publication_type = 'blog'
    masthead_url = None
    conversion_options = {
                            'comments'         : ''
                            ,'tags'            : category
@ -44,28 +45,38 @@ class AdvancedUserRecipe1307556816(BasicNewsRecipe):
                        (r'yimg\.com'),
                        (r'scorecardresearch\.com')]
-    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),lambda match: ''),
+    preprocess_regexps = [(re.compile(r'(<p>(&nbsp;|\s)*</p>|<a[^>]*>Tweet</a>|<a[^>]*>|</a>|<!--.*?-->|<h2[^>]*>[^<]*</h2>[^<]*)', re.DOTALL|re.IGNORECASE),lambda match: ''),
                        (re.compile(r'(&nbsp;|\s\s)+\s*', re.DOTALL|re.IGNORECASE),lambda match: ' '),
                        (re.compile(r'<h2[^>]*>([^<]*)</h2>[^>]*(<div[^>]*>)', re.DOTALL|re.IGNORECASE), lambda match: match.group(2) + '<div id="MERRYdate">' + match.group(1) + '</div>'),
                        (re.compile(r'(<h3[^>]*>)<a[^>]>((?!</a)*)</a></h3>', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + match.group(2) + '</h3>'),
-                        (re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: match.group(1) + '<br><cite>' + match.group(2) + '</cite>'),
+                        (re.compile(r'(<img[^>]*alt="([^"]*)"[^>]*>)', re.DOTALL|re.IGNORECASE),lambda match: '<div id="merryImage"><cite>' + match.group(2) + '</cite><br>' + match.group(1) + '</div>'),
                        (re.compile(r'<br( /)?>(<br( /)?>)+', re.DOTALL|re.IGNORECASE),lambda match: '<br>'),
                        (re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')
                        ]
-    extra_css = 'body, h3, p, #MERRYdate, h1, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em} #MERRYdate {font-size: 0.5em}'
+    extra_css = 'body, h3, p, div, span{margin:0px; padding:0px} h3.entry-header{font-size: 0.8em} div.entry-body{font-size: 0.7em}'
    def postprocess_html(self, soup, first):
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
-            width, height = img.size
+            #width, height = img.size
-            #print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
+            #print '***img is: ', iurl, '\n****width is: ', width, 'height is: ', height
            img.trim(0)
-            img.save(iurl)
+            #width, height = img.size
            #print '***TRIMMED img width is: ', width, 'height is: ', height
            left=0
            top=0
            border_color='#ffffff'
            width, height = img.size
-            #print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
+            #print '***retrieved img width is: ', width, 'height is: ', height
            height_correction = 1.17
            canvas = create_canvas(width, height*height_correction,border_color)
            canvas.compose(img, left, top)
            #img = canvas
            #img.save(iurl)
            canvas.save(iurl)
            #width, height = canvas.size
            #print '***NEW img width is: ', width, 'height is: ', height
        return soup
    feeds          = ['http://feeds.feedburner.com/GeekAndPoke?format=xml']