diff --git a/src/calibre/ebooks/lrf/html/convert_from.py b/src/calibre/ebooks/lrf/html/convert_from.py index 2bd63d1d8f..c72bcfbfe5 100644 --- a/src/calibre/ebooks/lrf/html/convert_from.py +++ b/src/calibre/ebooks/lrf/html/convert_from.py @@ -99,6 +99,10 @@ class HTMLConverter(object, LoggingInterface): # Replace common line break patterns with line breaks (re.compile(r'

( |\s)*

', re.IGNORECASE), lambda m: '
'), + # Replace empty headers with line breaks + (re.compile(r'( |\s)*', + re.IGNORECASE), lambda m: '
'), + # Replace entities (re.compile(ur'&(\S+?);'), partial(entity_to_unicode, exceptions=['lt', 'gt', 'amp'])), diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index b2c18b26a8..793d5cf45d 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in ( 'la_republica', 'physics_today', 'chicago_tribune', 'e_novine', 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', - 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', + 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_daily_mail.py b/src/calibre/web/feeds/recipes/recipe_daily_mail.py new file mode 100644 index 0000000000..c64e328bf2 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_daily_mail.py @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class TheDailyMail(BasicNewsRecipe): + title = u'The Daily Mail' + oldest_article = 2 + language = _('English') + author = 'RufusA' + simultaneous_downloads= 1 + max_articles_per_feed = 50 + + extra_css = 'h1 {text-align: left;}' + + remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ] + remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'}) + remove_tags_before = dict(name='div', attrs={'id':'content'}) + no_stylesheets = True + + feeds = [ + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')] + + def print_version(self, url): + main = url.partition('?')[0] + return main + '?printingPage=true'