...

2025-08-30 23:00:21 -04:00 · 2011-11-26 07:58:02 +05:30 · 2011-11-26 07:58:02 +05:30 · d134be5b1b
commit d134be5b1b
parent ac8cbdd168
1 changed files with 25 additions and 20 deletions
--- a/recipes/independent.recipe
+++ b/recipes/independent.recipe
@ -39,7 +39,9 @@ class TheIndependentNew(BasicNewsRecipe):
    encoding                = 'utf-8'
    remove_tags             =[
                               dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
-                               dict(attrs={'class' : ['autoplay','openBiogPopup']})
+                               dict(attrs={'class' : ['autoplay','openBiogPopup']}),
                               dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
                               dict(attrs={'style' : re.compile('.*')}),
                             ]
    keep_only_tags          =[dict(attrs={'id':'main'})]
@ -113,6 +115,7 @@ class TheIndependentNew(BasicNewsRecipe):
                    return None
        items_to_extract = []
        slideshow_elements = []
        for item in soup.findAll(attrs={'class' : re.compile("widget.*")}):
            remove = True
@ -131,6 +134,7 @@ class TheIndependentNew(BasicNewsRecipe):
            if (pattern.search(item['class'])) is not None:
                if self._FETCH_IMAGES:
                    remove = False
                    slideshow_elements.append(item)
                else:
                    remove = True
@ -148,7 +152,8 @@ class TheIndependentNew(BasicNewsRecipe):
        items_to_extract = []
        if self._FETCH_IMAGES:
-            for item in soup.findAll('a',attrs={'href' : re.compile('.*')}):
+            for element in slideshow_elements:
                for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
                    if item.img is not None:
                        #use full size image
                        img = item.findNext('img')
@ -156,7 +161,7 @@ class TheIndependentNew(BasicNewsRecipe):
                        img['src'] = item['href']
                        #insert caption if available
-                    if img['title'] is not None and (len(img['title']) > 1):
+                        if img.get('title') and (len(img['title']) > 1):
                            tag = Tag(soup,'h3')
                            text = NavigableString(img['title'])
                            tag.insert(0,text)
@ -283,7 +288,7 @@ class TheIndependentNew(BasicNewsRecipe):
        items_to_extract = []
        for item in soup.findAll('div', attrs={'class' : 'image'}):
            img = item.findNext('img')
-            if img is not None and img['src'] is not None:
+            if img and img.get('src'):
                # broken images still point to remote url
                pattern = re.compile('http://www.independent.co.uk.*')
                if pattern.match(img["src"]) is not None: