calibre/recipes/daily_mirror.recipe
2011-10-30 22:20:33 +05:30

58 lines
2.3 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
import re
class AdvancedUserRecipe1306061239(BasicNewsRecipe):
title = u'The Daily Mirror'
description = 'News as provide by The Daily Mirror -UK'
__author__ = 'Dave Asbury'
# last updated 30/10/11
language = 'en_GB'
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
oldest_article = 2
max_articles_per_feed = 30
remove_empty_feeds = True
remove_javascript = True
no_stylesheets = True
extra_css = '''
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''
keep_only_tags = [
dict(name='div',attrs={'id' : 'body-content'})
]
remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
remove_tags = [
dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
]
preprocess_regexps = [
(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
feeds = [
(u'News', u'http://www.mirror.co.uk/news/rss.xml')
,(u'Tech News', u'http://www.mirror.co.uk/news/technology/rss.xml')
,(u'Weird World','http://www.mirror.co.uk/news/weird-world/rss.xml')
,(u'Film Gossip','http://www.mirror.co.uk/celebs/film/rss.xml')
,(u'Music News','http://www.mirror.co.uk/celebs/music/rss.xml')
,(u'Celebs and Tv Gossip','http://www.mirror.co.uk/celebs/tv/rss.xml')
,(u'Sport','http://www.mirror.co.uk/sport/rss.xml')
,(u'Life Style','http://www.mirror.co.uk/life-style/rss.xml')
,(u'Advice','http://www.mirror.co.uk/advice/rss.xml')
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
]