mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update HNA
This commit is contained in:
parent
c544efa75a
commit
478f910611
@ -5,8 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Fetch Hessisch Niedersachsische Allgemeine.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||||
|
||||
class hnaDe(BasicNewsRecipe):
|
||||
|
||||
@ -20,78 +19,28 @@ class hnaDe(BasicNewsRecipe):
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
auto_cleanup = True
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://idcdn.de/west/assets/hna-de/img/logo--cf5324e1.svg'
|
||||
|
||||
remove_tags = [dict(id='topnav'),
|
||||
dict(id='nav_main'),
|
||||
dict(id='teaser'),
|
||||
dict(id='suchen'),
|
||||
dict(id='superbanner'),
|
||||
dict(id='navigation'),
|
||||
dict(id='skyscraper'),
|
||||
dict(id='idHeaderSearchForm'),
|
||||
dict(id='idHeaderSearchBar'),
|
||||
dict(id='idLoginBarWrap'),
|
||||
dict(id='idAccountButtons'),
|
||||
dict(id='idHeadButtons'),
|
||||
dict(id='idBoxesWrap'),
|
||||
dict(id='idJSMainNavigation'),
|
||||
dict(id=''),
|
||||
dict(name='span'),
|
||||
dict(name='ul', attrs={'class': 'linklist'}),
|
||||
dict(name='ul', attrs={
|
||||
'class': 'idMainNavi idJSActive idHeadHomeBtn'}),
|
||||
dict(name='ul', attrs={
|
||||
'class': 'idHiddenNavi idNaviSubcategories'}),
|
||||
dict(name='a', attrs={'href': '#'}),
|
||||
dict(name='a', attrs={'class': 'idImgLink'}),
|
||||
dict(name='a', attrs={'class': 'idListLink'}),
|
||||
dict(name='div', attrs={'class': 'hlist'}),
|
||||
dict(name='div', attrs={'class': 'idTabWrap'}),
|
||||
dict(name='li', attrs={
|
||||
'class': 'idButton idIsLoginGroup idHeaderRegister '}),
|
||||
dict(name='li', attrs={'class': 'idVideoBar idFirst'}),
|
||||
dict(name='li', attrs={
|
||||
'class': 'idSetStartPageLink idLast'}),
|
||||
dict(name='li', attrs={'class': 'idKinderNetzBar idLast'}),
|
||||
dict(name='li', attrs={'class': 'idFotoBar '}),
|
||||
dict(name='div', attrs={'class': 'subc noprint'}),
|
||||
dict(name='div', attrs={'class': 'idTxtLay'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idLay idClStandard idStaticHtml'}),
|
||||
dict(name='div', attrs={'class': 'idHeaderWrap'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idLay idRss idClStandard'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idLay idClStandard idLeadStoriesFocus idLeadStoriesFocusOverlay '}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idTeaserLay idTeaserFloat idMediaLeft idLast'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idHeaderButtons idAccountButtons'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idTeaserLay idTeaserWithImg idSize4 idMediaLeft'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idHeaderButtons idHeadButtons'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idHeaderButtons idSetStartPage'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idLay idClHl idTeaserList '}),
|
||||
dict(name='div', attrs={'class': 'idNavigationWrap'}),
|
||||
dict(name='div', attrs={'class': 'idBreadcrumbWrap'}),
|
||||
dict(name='div', attrs={'class': 'idBoxesWrap'}),
|
||||
dict(name='div', attrs={'class': 'idBreadcrumb'}),
|
||||
dict(name='div', attrs={
|
||||
'class': 'idLay idAdvertising idClStandard '}),
|
||||
dict(name='span', attrs={'class': 'idHeadLineIntro'}),
|
||||
dict(name='p', attrs={'class': 'breadcrumb'}),
|
||||
dict(name='a', attrs={'style': 'cursor:hand'}),
|
||||
dict(name='p', attrs={'class': 'h5'}),
|
||||
dict(name='p', attrs={'class': 'idMoreEnd'})]
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class': 'idTxtLay idStaticHtmlIEHelper'})]
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('https://epaper.meinehna.de/')
|
||||
if a := soup.find('a', attrs={'class':'edition-cover__link'}):
|
||||
if citem := a.find('img', src=True):
|
||||
return citem['src']
|
||||
|
||||
feeds = [('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
|
||||
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'),
|
||||
('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'),
|
||||
('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur')]
|
||||
keep_only_tags = [
|
||||
dict(name='article', attrs={'class':lambda x: x and 'id-Story' in x.split()})
|
||||
]
|
||||
remove_tags = [
|
||||
classes(
|
||||
'id-DonaldBreadcrumb id-StoryElement-interactionBar id-Recommendation '
|
||||
'id-Comments id-Comments--targetHelper id-StoryElement-inArticleReco'
|
||||
)
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
|
||||
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'),
|
||||
('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'),
|
||||
('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur')
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user