mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update The Independent
This commit is contained in:
parent
47a1649a22
commit
c2edf7a890
@ -15,6 +15,10 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
#Flag to enable/disable image fetching (not business)
|
#Flag to enable/disable image fetching (not business)
|
||||||
_FETCH_IMAGES = True
|
_FETCH_IMAGES = True
|
||||||
|
|
||||||
|
#Set max gallery images here (respects _FETCH_IMAGES)
|
||||||
|
# -1 for infinite
|
||||||
|
_MAX_GALLERY_IMAGES = -1
|
||||||
|
|
||||||
|
|
||||||
#used for converting rating to stars
|
#used for converting rating to stars
|
||||||
_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
|
_STAR_URL = 'http://www.independent.co.uk/skins/ind/images/rating_star.png'
|
||||||
@ -41,6 +45,7 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
|
dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
|
||||||
dict(attrs={'class' : ['autoplay','openBiogPopup']}),
|
dict(attrs={'class' : ['autoplay','openBiogPopup']}),
|
||||||
dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
|
dict(name='img',attrs={'alt' : ['Get Adobe Flash player']}),
|
||||||
|
dict(name='img',attrs={'alt' : ['view gallery']}),
|
||||||
dict(attrs={'style' : re.compile('.*')}),
|
dict(attrs={'style' : re.compile('.*')}),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -171,25 +176,43 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
|
for item in element.findAll('a',attrs={'href' : re.compile('.*')}):
|
||||||
if item.img is not None:
|
if item.img is not None:
|
||||||
#use full size image
|
#use full size image
|
||||||
|
images = []
|
||||||
|
|
||||||
img = item.findNext('img')
|
img = item.findNext('img')
|
||||||
|
|
||||||
img['src'] = item['href']
|
if not '?action=gallery' in item['href']:
|
||||||
|
img['src'] = item['href']
|
||||||
#insert caption if available
|
|
||||||
if img.get('title') and (len(img['title']) > 1):
|
|
||||||
tag = Tag(soup,'h3')
|
tag = Tag(soup,'h3')
|
||||||
text = NavigableString(img['title'])
|
text = ''
|
||||||
|
try:
|
||||||
|
text = img['data-title']
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if img.get('title') and (len(img['title']) > 1):
|
||||||
|
text = NavigableString(img['title'])
|
||||||
tag.insert(0,text)
|
tag.insert(0,text)
|
||||||
|
images.append((img, tag))
|
||||||
#picture before text
|
else:
|
||||||
|
gallery_images, remove_link = self._get_gallery_images(item['href'])
|
||||||
|
images = images + gallery_images
|
||||||
|
if remove_link:
|
||||||
|
gal_link = soup.find('a',attrs={'id' : 'view-gallery'})
|
||||||
|
if gal_link:
|
||||||
|
gal_link.extract()
|
||||||
img.extract()
|
img.extract()
|
||||||
item.insert(0,img)
|
for (img, title) in images:
|
||||||
item.insert(1,tag)
|
#insert caption if available
|
||||||
|
if title:
|
||||||
|
#picture before text
|
||||||
|
img.extract()
|
||||||
|
item.insert(0,img)
|
||||||
|
item.insert(1,title)
|
||||||
|
|
||||||
# remove link
|
# remove link
|
||||||
item.name = "div"
|
item.name = "div"
|
||||||
item["class"]='image'
|
item["class"]='image'
|
||||||
del item["href"]
|
del item["href"]
|
||||||
|
|
||||||
|
|
||||||
#remove empty subtitles
|
#remove empty subtitles
|
||||||
@ -324,6 +347,44 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def _get_gallery_images(self,url):
|
||||||
|
gallery_soup = self.index_to_soup(url)
|
||||||
|
images = []
|
||||||
|
remove_link = True
|
||||||
|
total = 1
|
||||||
|
try:
|
||||||
|
counter = gallery_soup.find('div',attrs={'id' : ['counter']})
|
||||||
|
total = counter.contents[0].split('/')
|
||||||
|
total = int(total[1].rstrip())
|
||||||
|
except:
|
||||||
|
total = 1
|
||||||
|
|
||||||
|
if self._MAX_GALLERY_IMAGES >= 0 and total > self._MAX_GALLERY_IMAGES:
|
||||||
|
total = self._MAX_GALLERY_IMAGES
|
||||||
|
remove_link = False
|
||||||
|
|
||||||
|
for i in range(1, total +1):
|
||||||
|
image, title = self._get_image_from_gallery(gallery_soup)
|
||||||
|
if image:
|
||||||
|
images.append((image,title))
|
||||||
|
next = url + '&ino=' + str(i + 1)
|
||||||
|
gallery_soup = self.index_to_soup(next)
|
||||||
|
images.reverse()
|
||||||
|
return images, remove_link
|
||||||
|
|
||||||
|
def _get_image_from_gallery(self,soup):
|
||||||
|
try:
|
||||||
|
container = soup.find('div',attrs={'id' : ['main-image']})
|
||||||
|
image = container.find('img')
|
||||||
|
if image:
|
||||||
|
title = soup.find('div',attrs={'id' : ['image-title']})
|
||||||
|
return image, title
|
||||||
|
except:
|
||||||
|
print 'error fetching gallery image'
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _recurisvely_linearise_tag_tree(
|
def _recurisvely_linearise_tag_tree(
|
||||||
self,
|
self,
|
||||||
item,
|
item,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user