diff --git a/recipes/daily_mirror.recipe b/recipes/daily_mirror.recipe index 5d4dbe3f4b..f0d28c72e7 100644 --- a/recipes/daily_mirror.recipe +++ b/recipes/daily_mirror.recipe @@ -1,10 +1,11 @@ from calibre.web.feeds.news import BasicNewsRecipe - +import re class AdvancedUserRecipe1306061239(BasicNewsRecipe): title = u'The Daily Mirror' description = 'News as provide by The Daily Mirror -UK' __author__ = 'Dave Asbury' + # last updated 30/10/11 language = 'en_GB' cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg' @@ -12,26 +13,30 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe): masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif' - oldest_article = 1 - max_articles_per_feed = 100 + oldest_article = 2 + max_articles_per_feed = 30 remove_empty_feeds = True remove_javascript = True no_stylesheets = True + extra_css = ''' + body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;} + ''' keep_only_tags = [ - dict(name='h1'), - dict(attrs={'class':['article-attr']}), - dict(name='div', attrs={'class' : [ 'article-body', 'crosshead']}) + dict(name='div',attrs={'id' : 'body-content'}) + ] - - ] + remove_tags_after = [dict (name='div',attrs={'class' : 'related'})] remove_tags = [ - dict(name='div', attrs={'class' : ['caption', 'article-resize']}), - dict( attrs={'class':'append-html'}) - ] - + dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}), + dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}), + dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}), + dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'}) + ] + preprocess_regexps = [ + (re.compile(r'