The Workingham Times by DM. Fixes #7672 (New recipe for The Workingham Times)

This commit is contained in:
Kovid Goyal 2010-11-26 07:41:30 -07:00
parent ca3f9b841d
commit 64ede2a0ea
2 changed files with 59 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1011 B

View File

@ -0,0 +1,59 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.getwokingham.co.uk
'''
from calibre.web.feeds.recipes import BasicNewsRecipe
class TheWorkinghamTimes(BasicNewsRecipe):
title = 'The Workingham Times'
__author__ = 'Darko Miletic'
description = 'News from UK'
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf8'
publisher = 'The Wokingham Times - S&B media'
category = 'news, UK, world'
language = 'en_GB'
publication_type = 'newsportal'
extra_css = """
body{ font-family: Arial,sans-serif }
img{display: block; margin-bottom: 0.4em}
"""
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [dict(name='div', attrs={'id':'article-body'})]
remove_tags = [
dict(name='div' , attrs={'class':['ad']})
,dict(name=['meta','base','iframe','embed','object'])
,dict(name='span' , attrs={'class':'caption small'})
]
remove_attributes = ['width','height','lang']
feeds = [
('Home' , 'http://www.getwokingham.co.uk/rss.xml' )
,('News' , 'http://www.getwokingham.co.uk/news/rss.xml' )
,('Entertainment', 'http://www.getwokingham.co.uk/entertainment/rss.xml')
,('Lifestyle' , 'http://www.getwokingham.co.uk/lifestyle/rss.xml' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
item.name = 'span'
del item['href']
return soup