Update HNA

This commit is contained in:
Kovid Goyal 2022-12-27 11:01:56 +05:30
parent c544efa75a
commit 478f910611
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -5,8 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch Hessisch Niedersachsische Allgemeine.
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.web.feeds.news import BasicNewsRecipe, classes
class hnaDe(BasicNewsRecipe):
@ -20,78 +19,28 @@ class hnaDe(BasicNewsRecipe):
max_articles_per_feed = 40
no_stylesheets = True
remove_javascript = True
auto_cleanup = True
encoding = 'utf-8'
masthead_url = 'https://idcdn.de/west/assets/hna-de/img/logo--cf5324e1.svg'
remove_tags = [dict(id='topnav'),
dict(id='nav_main'),
dict(id='teaser'),
dict(id='suchen'),
dict(id='superbanner'),
dict(id='navigation'),
dict(id='skyscraper'),
dict(id='idHeaderSearchForm'),
dict(id='idHeaderSearchBar'),
dict(id='idLoginBarWrap'),
dict(id='idAccountButtons'),
dict(id='idHeadButtons'),
dict(id='idBoxesWrap'),
dict(id='idJSMainNavigation'),
dict(id=''),
dict(name='span'),
dict(name='ul', attrs={'class': 'linklist'}),
dict(name='ul', attrs={
'class': 'idMainNavi idJSActive idHeadHomeBtn'}),
dict(name='ul', attrs={
'class': 'idHiddenNavi idNaviSubcategories'}),
dict(name='a', attrs={'href': '#'}),
dict(name='a', attrs={'class': 'idImgLink'}),
dict(name='a', attrs={'class': 'idListLink'}),
dict(name='div', attrs={'class': 'hlist'}),
dict(name='div', attrs={'class': 'idTabWrap'}),
dict(name='li', attrs={
'class': 'idButton idIsLoginGroup idHeaderRegister '}),
dict(name='li', attrs={'class': 'idVideoBar idFirst'}),
dict(name='li', attrs={
'class': 'idSetStartPageLink idLast'}),
dict(name='li', attrs={'class': 'idKinderNetzBar idLast'}),
dict(name='li', attrs={'class': 'idFotoBar '}),
dict(name='div', attrs={'class': 'subc noprint'}),
dict(name='div', attrs={'class': 'idTxtLay'}),
dict(name='div', attrs={
'class': 'idLay idClStandard idStaticHtml'}),
dict(name='div', attrs={'class': 'idHeaderWrap'}),
dict(name='div', attrs={
'class': 'idLay idRss idClStandard'}),
dict(name='div', attrs={
'class': 'idLay idClStandard idLeadStoriesFocus idLeadStoriesFocusOverlay '}),
dict(name='div', attrs={
'class': 'idTeaserLay idTeaserFloat idMediaLeft idLast'}),
dict(name='div', attrs={
'class': 'idHeaderButtons idAccountButtons'}),
dict(name='div', attrs={
'class': 'idTeaserLay idTeaserWithImg idSize4 idMediaLeft'}),
dict(name='div', attrs={
'class': 'idHeaderButtons idHeadButtons'}),
dict(name='div', attrs={
'class': 'idHeaderButtons idSetStartPage'}),
dict(name='div', attrs={
'class': 'idLay idClHl idTeaserList '}),
dict(name='div', attrs={'class': 'idNavigationWrap'}),
dict(name='div', attrs={'class': 'idBreadcrumbWrap'}),
dict(name='div', attrs={'class': 'idBoxesWrap'}),
dict(name='div', attrs={'class': 'idBreadcrumb'}),
dict(name='div', attrs={
'class': 'idLay idAdvertising idClStandard '}),
dict(name='span', attrs={'class': 'idHeadLineIntro'}),
dict(name='p', attrs={'class': 'breadcrumb'}),
dict(name='a', attrs={'style': 'cursor:hand'}),
dict(name='p', attrs={'class': 'h5'}),
dict(name='p', attrs={'class': 'idMoreEnd'})]
remove_tags_after = [
dict(name='div', attrs={'class': 'idTxtLay idStaticHtmlIEHelper'})]
def get_cover_url(self):
soup = self.index_to_soup('https://epaper.meinehna.de/')
if a := soup.find('a', attrs={'class':'edition-cover__link'}):
if citem := a.find('img', src=True):
return citem['src']
feeds = [('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'),
('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'),
('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur')]
keep_only_tags = [
dict(name='article', attrs={'class':lambda x: x and 'id-Story' in x.split()})
]
remove_tags = [
classes(
'id-DonaldBreadcrumb id-StoryElement-interactionBar id-Recommendation '
'id-Comments id-Comments--targetHelper id-StoryElement-inArticleReco'
)
]
feeds = [
('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'),
('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'),
('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur')
]