Fix #873526 (Quickfix recipe for new automatiseringgids website)

This commit is contained in:
Kovid Goyal 2011-10-14 04:37:49 +05:30
parent 6ee585ecc8
commit 46fcf6dbc0

View File

@ -10,27 +10,15 @@ class autogids(BasicNewsRecipe):
publisher = 'AutomatiseringGids' publisher = 'AutomatiseringGids'
category = 'Nieuws, IT, Nederlandstalig' category = 'Nieuws, IT, Nederlandstalig'
simultaneous_downloads = 5 simultaneous_downloads = 5
#delay = 1 timefmt = ' [%a, %d %B, %Y]'
timefmt = ' [%A, %d %B, %Y]'
#timefmt = ''
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
publication_type = 'newspaper' publication_type = 'newspaper'
encoding = 'utf-8' encoding = 'utf-8'
cover_url = 'http://www.automatiseringgids.nl/siteimg/header_logo.gif' cover_url = 'http://www.automatiseringgids.nl/binaries/content/gallery/ag/marketing/ag-avatar-100x50.jpg'
keep_only_tags = [dict(id=['content'])] keep_only_tags = [dict(name='div', attrs={'class':['content']})]
extra_css = '.artikelheader {font-size:0.8em; color: #666;} .artikelintro {font-weight:bold} div.imgArticle {float: right; margin: 0 0em 1em 1em; display: block; position: relative; } \
h2 { margin: 0 0 0.5em; min-height: 30px; font-size: 1.5em; letter-spacing: -0.2px; margin: 0 0 0.5em; color: black; font-weight: bold; line-height: 1.2em; padding: 4px 3px 0; }'
remove_tags = [dict(name='div', attrs={'id':['loginbox','reactiecollapsible','reactiebox']}),
dict(name='div', attrs={'class':['column_a','column_c','bannerfullsize','reactieheader','reactiecollapsible','formulier','artikel_headeroptions']}),
dict(name='ul', attrs={'class':['highlightlist']}),
dict(name='input', attrs={'type':['button']}),
dict(name='div', attrs={'style':['display:block; width:428px; height:30px; float:left;']}),
]
preprocess_regexps = [ preprocess_regexps = [
(re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE), (re.compile(r'(<h3>Reacties</h3>|<h2>Zie ook:</h2>|<div style=".*</div>|<a[^>]*>|</a>)', re.DOTALL|re.IGNORECASE),
lambda match: ''), lambda match: ''),