IMproved versions of Sueddeutsche and HNA

This commit is contained in:
Kovid Goyal 2010-10-06 09:28:54 -06:00
parent e0e0b42f1f
commit 570eb48fb1
2 changed files with 24 additions and 12 deletions

View File

@ -30,21 +30,33 @@ class hnaDe(BasicNewsRecipe):
dict(id='superbanner'),
dict(id='navigation'),
dict(id='skyscraper'),
dict(id='idNavigationWrap'),
dict(id='idHeaderSearchForm'),
dict(id='idLoginBarWrap'),
dict(id='idAccountButtons'),
dict(id='idHeadButtons'),
dict(id='idBoxesWrap'),
dict(id=''),
dict(name='span'),
dict(name='ul', attrs={'class':'linklist'}),
dict(name='a', attrs={'href':'#'}),
dict(name='div', attrs={'class':'hlist'}),
dict(name='li', attrs={'class':'idButton idIsLoginGroup idHeaderRegister '}),
dict(name='li', attrs={'class':'idVideoBar idFirst'}),
dict(name='li', attrs={'class':'idSetStartPageLink idLast'}),
dict(name='li', attrs={'class':'idKinderNetzBar idLast'}),
dict(name='li', attrs={'class':'idFotoBar '}),
dict(name='div', attrs={'class':'subc noprint'}),
dict(name='div', attrs={'class':'idBreadcrumb'}),
dict(name='div', attrs={'class':'idLay idAdvertising idClStandard '}),
dict(name='span', attrs={'class':'idHeadLineIntro'}),
dict(name='p', attrs={'class':'breadcrumb'}),
dict(name='a', attrs={'style':'cursor:hand'}),
dict(name='p', attrs={'class':'h5'})]
dict(name='p', attrs={'class':'h5'}),
dict(name='p', attrs={'class':'idMoreEnd'})]
#remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
remove_tags_after = [dict(name='a', attrs={'href':'#'})]
remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})]
feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]

View File

@ -28,7 +28,7 @@ class Sueddeutsche(BasicNewsRecipe):
"SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
"pages closed","basebox right narrow"]}),
"pages closed","basebox right narrow","headslot galleried"]}),
dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
"item","videoBigButton","articlefooter full-column",
@ -38,10 +38,11 @@ class Sueddeutsche(BasicNewsRecipe):
dict(name='div', attrs={'style':["position:relative;"]}),
dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav"]}),
dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
dict(name='td', attrs={'class':["artikelDruckenRight"]}),
dict(name='p', text = "ANZEIGE")
]
remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]
extra_css = '''
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
@ -70,9 +71,8 @@ class Sueddeutsche(BasicNewsRecipe):
(u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss')
]
def print_version(self, url):
return url.replace('/text/', '/text/print.html')
main, sep, id = url.rpartition('/')
return main + '/2.220/' + id