mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Independent
This commit is contained in:
parent
23cbedd708
commit
b8dda93092
@ -1,5 +1,5 @@
|
|||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
class TheIndependentNew(BasicNewsRecipe):
|
class TheIndependentNew(BasicNewsRecipe):
|
||||||
|
|
||||||
@ -20,17 +20,34 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
masthead_url = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
|
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
|
|
||||||
keep_only_tags = [dict(id='main')]
|
keep_only_tags = [
|
||||||
remove_tags = [
|
dict(itemprop=['articleBody', 'headline', 'contentUrl']),
|
||||||
dict(attrs={'class':['column-2', 'article-links', 'second-gallery', 'buttons']}),
|
dict(attrs={'class':['intro', 'author']}),
|
||||||
dict(attrs={'class':lambda x: x and 'share-tool-ctr' in x.split()}),
|
|
||||||
dict(id=lambda x: x and re.match(r'slideshow-\d+', x)),
|
|
||||||
dict(id=['anchor-href-comment', 'anchor-href-reply', 'commentReference']),
|
|
||||||
]
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(attrs={'class':lambda x: x and 'show-all' in x.split()}),
|
||||||
|
dict(attrs={'data-scald-gallery':True}),
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for div in soup.findAll(attrs={'class':'full-gallery'}):
|
||||||
|
imgs = {}
|
||||||
|
for li in div.findAll('li', attrs={'data-gallery-item':True, 'data-original':True}):
|
||||||
|
imgs[li['data-gallery-item']] = li['data-original']
|
||||||
|
li.extract()
|
||||||
|
for li in div.findAll('li', attrs={'data-gallery-legend':True}):
|
||||||
|
src = imgs.get(li['data-gallery-legend'])
|
||||||
|
if src is not None:
|
||||||
|
img = Tag(soup, 'img')
|
||||||
|
img['src'] = src
|
||||||
|
img['style'] = 'display:block'
|
||||||
|
li.append(img)
|
||||||
|
return soup
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News - UK',
|
(u'News - UK',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user