mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update LA Times
This commit is contained in:
parent
80fd071521
commit
4375a580dc
@ -8,6 +8,15 @@ from pprint import pformat
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(
|
||||||
|
attrs={
|
||||||
|
'class': lambda x: x and frozenset(x.split()).intersection(q)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def absurl(url):
|
def absurl(url):
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.latimes.com' + url
|
url = 'http://www.latimes.com' + url
|
||||||
@ -30,13 +39,21 @@ class LATimes(BasicNewsRecipe):
|
|||||||
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
cover_url = 'http://www.latimes.com/includes/sectionfronts/A1.pdf'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(itemprop='articleBody'),
|
|
||||||
dict(name='h1'),
|
dict(name='h1'),
|
||||||
dict(attrs={'data-content-type': 'image'}),
|
dict(attrs={
|
||||||
|
'class': 'trb_ar_main'
|
||||||
|
}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_tags_after = [
|
||||||
|
dict(itemprop='articleBody'),
|
||||||
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'data-content-type': 'story'}),
|
dict(attrs={
|
||||||
dict(attrs={'data-load-type': 'commentFrame'}),
|
'data-content-type': 'blurb'
|
||||||
|
}),
|
||||||
|
classes('trb_ar_cont trb_gptAd trb_filmstrip trb_ar_sponsoredmod'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
@ -49,7 +66,11 @@ class LATimes(BasicNewsRecipe):
|
|||||||
'data-content-slug': True,
|
'data-content-slug': True,
|
||||||
}
|
}
|
||||||
):
|
):
|
||||||
a = x.find('a', attrs={'class': lambda x: not x or 'SectionHeading' not in x})
|
a = x.find(
|
||||||
|
'a', attrs={
|
||||||
|
'class': lambda x: not x or 'SectionHeading' not in x
|
||||||
|
}
|
||||||
|
)
|
||||||
if a is not None:
|
if a is not None:
|
||||||
url = absurl(a['href'])
|
url = absurl(a['href'])
|
||||||
section = x['data-content-section'].capitalize()
|
section = x['data-content-section'].capitalize()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user