diff --git a/recipes/heise.recipe b/recipes/heise.recipe index ba93ea96ce..a8e41916a4 100644 --- a/recipes/heise.recipe +++ b/recipes/heise.recipe @@ -45,30 +45,21 @@ class heiseDe(BasicNewsRecipe): } ''' - remove_tags = [dict(id='navi_top'), - dict(id='navi_bottom'), - dict(id='logo'), - dict(id='login_suche'), + remove_tags = [dict(id='navi_bottom'), + dict(id='software_tabbox'), + dict(id='heisetopnavi_sub_container'), dict(id='navi_login'), - dict(id='navigation'), - dict(id='breadcrumb'), - dict(id='adblockerwarnung'), - dict(id=''), - dict(id='sitemap'), - dict(id='bannerzone'), - dict(name='span', attrs={'class':'rsaquo'}), + dict(id='heisetopnavi_search'), + dict(id='themen_aktuell'), + dict(id='navi_top'), + dict(id='logo_bereich'), dict(name='div', attrs={'class':'news_logo'}), - dict(name='div', attrs={'class':'bcadv ISI_IGNORE'}), - dict(name='div', attrs={'class':'navi_top_container'}), - dict(name='p', attrs={'class':'news_option'}), - dict(name='p', attrs={'class':'news_navi'}), - dict(name='div', attrs={'class':'news_foren'})] + dict(name='div', attrs={'class':'heisetopnavi_header'}), + dict(name='p', attrs={'class':'teasermitbildundtext'}), + dict(name='div', attrs={'class':'news_foren'}), + dict(name='p', attrs={'class':'news_navi'})] + remove_tags_after = [dict(name='div', attrs={'class':'news_foren'})] feeds = [ ('heise', 'http://www.heise.de/newsticker/heise.rdf') ] - - - - - diff --git a/recipes/hna.recipe b/recipes/hna.recipe index e3349f0c7b..e5dd47f20a 100644 --- a/recipes/hna.recipe +++ b/recipes/hna.recipe @@ -15,12 +15,12 @@ class hnaDe(BasicNewsRecipe): __author__ = 'Oliver Niesner' use_embedded_content = False language = 'de' - use_embedded_content = False timefmt = ' [%d %b %Y]' max_articles_per_feed = 40 no_stylesheets = True remove_javascript = True + auto_cleanup = True encoding = 'utf-8' remove_tags = [dict(id='topnav'), @@ -30,23 +30,44 @@ class hnaDe(BasicNewsRecipe): dict(id='superbanner'), dict(id='navigation'), dict(id='skyscraper'), - dict(id='idNavigationWrap'), dict(id='idHeaderSearchForm'), + dict(id='idHeaderSearchBar'), dict(id='idLoginBarWrap'), dict(id='idAccountButtons'), dict(id='idHeadButtons'), dict(id='idBoxesWrap'), + dict(id='idJSMainNavigation'), dict(id=''), dict(name='span'), dict(name='ul', attrs={'class':'linklist'}), + dict(name='ul', attrs={'class':'idMainNavi idJSActive idHeadHomeBtn'}), + dict(name='ul', attrs={'class':'idHiddenNavi idNaviSubcategories'}), dict(name='a', attrs={'href':'#'}), + dict(name='a', attrs={'class':'idImgLink'}), + dict(name='a', attrs={'class':'idListLink'}), dict(name='div', attrs={'class':'hlist'}), + dict(name='div', attrs={'class':'idTabWrap'}), dict(name='li', attrs={'class':'idButton idIsLoginGroup idHeaderRegister '}), dict(name='li', attrs={'class':'idVideoBar idFirst'}), dict(name='li', attrs={'class':'idSetStartPageLink idLast'}), dict(name='li', attrs={'class':'idKinderNetzBar idLast'}), dict(name='li', attrs={'class':'idFotoBar '}), dict(name='div', attrs={'class':'subc noprint'}), + dict(name='div', attrs={'class':'idTxtLay'}), + dict(name='div', attrs={'class':'idLay idClStandard idStaticHtml'}), + dict(name='div', attrs={'class':'idHeaderWrap'}), + dict(name='div', attrs={'class':'idLay idRss idClStandard'}), + #dict(name='div', attrs={'class':' idHide idChannelChooser idJSComponent idParam-component-IDChannelChooser'}), + dict(name='div', attrs={'class':'idLay idClStandard idLeadStoriesFocus idLeadStoriesFocusOverlay '}), + dict(name='div', attrs={'class':'idTeaserLay idTeaserFloat idMediaLeft idLast'}), + dict(name='div', attrs={'class':'idHeaderButtons idAccountButtons'}), + dict(name='div', attrs={'class':'idTeaserLay idTeaserWithImg idSize4 idMediaLeft'}), + dict(name='div', attrs={'class':'idHeaderButtons idHeadButtons'}), + dict(name='div', attrs={'class':'idHeaderButtons idSetStartPage'}), + dict(name='div', attrs={'class':'idLay idClHl idTeaserList '}), + dict(name='div', attrs={'class':'idNavigationWrap'}), + dict(name='div', attrs={'class':'idBreadcrumbWrap'}), + dict(name='div', attrs={'class':'idBoxesWrap'}), dict(name='div', attrs={'class':'idBreadcrumb'}), dict(name='div', attrs={'class':'idLay idAdvertising idClStandard '}), dict(name='span', attrs={'class':'idHeadLineIntro'}), @@ -55,9 +76,12 @@ class hnaDe(BasicNewsRecipe): dict(name='p', attrs={'class':'h5'}), dict(name='p', attrs={'class':'idMoreEnd'})] #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})] - remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})] + #remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})] + remove_tags_after = [dict(name='div', attrs={'class':'idTxtLay idStaticHtmlIEHelper'})] feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), - ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ] + ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel'), + ('hna_KSV', 'http://feeds2.feedburner.com/hna/ksv'), + ('hna_kultur', 'http://feeds2.feedburner.com/hna/kultur') ]