mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New recipe for The Daily Mail UK by RufusA. Fix #998 (HTML2LRF and empty headings)
This commit is contained in:
parent
aedb2cf959
commit
8ea72440c8
@ -99,6 +99,10 @@ class HTMLConverter(object, LoggingInterface):
|
|||||||
# Replace common line break patterns with line breaks
|
# Replace common line break patterns with line breaks
|
||||||
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
(re.compile(r'<p>( |\s)*</p>', re.IGNORECASE), lambda m: '<br />'),
|
||||||
|
|
||||||
|
# Replace empty headers with line breaks
|
||||||
|
(re.compile(r'<h[0-5]?>( |\s)*</h[0-5]?>',
|
||||||
|
re.IGNORECASE), lambda m: '<br />'),
|
||||||
|
|
||||||
# Replace entities
|
# Replace entities
|
||||||
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
(re.compile(ur'&(\S+?);'), partial(entity_to_unicode,
|
||||||
exceptions=['lt', 'gt', 'amp'])),
|
exceptions=['lt', 'gt', 'amp'])),
|
||||||
|
@ -33,7 +33,7 @@ recipe_modules = ['recipe_' + r for r in (
|
|||||||
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
'la_republica', 'physics_today', 'chicago_tribune', 'e_novine',
|
||||||
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
'al_jazeera', 'winsupersite', 'borba', 'courrierinternational',
|
||||||
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
|
'lamujerdemivida', 'soldiers', 'theonion', 'news_times',
|
||||||
'el_universal', 'mediapart', 'wikinews_en', 'ecogeek',
|
'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail',
|
||||||
)]
|
)]
|
||||||
|
|
||||||
import re, imp, inspect, time, os
|
import re, imp, inspect, time, os
|
||||||
|
33
src/calibre/web/feeds/recipes/recipe_daily_mail.py
Normal file
33
src/calibre/web/feeds/recipes/recipe_daily_mail.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class TheDailyMail(BasicNewsRecipe):
|
||||||
|
title = u'The Daily Mail'
|
||||||
|
oldest_article = 2
|
||||||
|
language = _('English')
|
||||||
|
author = 'RufusA'
|
||||||
|
simultaneous_downloads= 1
|
||||||
|
max_articles_per_feed = 50
|
||||||
|
|
||||||
|
extra_css = 'h1 {text-align: left;}'
|
||||||
|
|
||||||
|
remove_tags = [ dict(name='ul', attrs={'class':'article-icons-links'}) ]
|
||||||
|
remove_tags_after = dict(name='h3', attrs={'class':'social-links-title'})
|
||||||
|
remove_tags_before = dict(name='div', attrs={'id':'content'})
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
|
||||||
|
(u'News', u'http://www.dailymail.co.uk/news/index.rss'),
|
||||||
|
(u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'),
|
||||||
|
(u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'),
|
||||||
|
(u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'),
|
||||||
|
(u'Health', u'http://www.dailymail.co.uk/health/index.rss'),
|
||||||
|
(u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'),
|
||||||
|
(u'Money', u'http://www.dailymail.co.uk/money/index.rss'),
|
||||||
|
(u'Property', u'http://www.dailymail.co.uk/property/index.rss'),
|
||||||
|
(u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
|
||||||
|
(u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
main = url.partition('?')[0]
|
||||||
|
return main + '?printingPage=true'
|
Loading…
x
Reference in New Issue
Block a user