Update Associated Press

This commit is contained in:
Kovid Goyal 2018-11-13 09:54:00 +05:30
parent 9f24576ab3
commit 5a7275b976
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,6 +7,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import json import json
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
def classes(classes): def classes(classes):
@ -28,11 +29,11 @@ class AssociatedPress(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = False remove_empty_feeds = False
keep_only_tags = [ keep_only_tags = [
classes('topTitle articleImage articleBody'), classes('CardHeadline LeadFeature Article'),
] ]
remove_tags = [ remove_tags = [
classes('ad-placeholder modalImageButton modalVideoButton'), classes('ad-placeholder modalImageButton modalVideoButton'),
dict(name='button'), dict(name=['button', 'svg']),
] ]
def parse_index(self): def parse_index(self):
@ -71,3 +72,11 @@ class AssociatedPress(BasicNewsRecipe):
articles.append({'title': title, 'url': url}) articles.append({'title': title, 'url': url})
self.log('') self.log('')
return articles return articles
def preprocess_html(self, soup, *a):
for meta in soup.findAll('meta', attrs=dict(name="twitter:image:alt")):
for div in soup.findAll(**classes('LeadFeature')):
img = Tag(soup, 'img')
img['src'] = meta['content']
div.insert(0, img)
return soup