mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Straits Times
This commit is contained in:
parent
aabe59dd54
commit
da6c7c6c3c
@ -5,10 +5,15 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
www.straitstimes.com
|
www.straitstimes.com
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import re
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(attrs={
|
||||||
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class StraitsTimes(BasicNewsRecipe):
|
class StraitsTimes(BasicNewsRecipe):
|
||||||
title = 'The Straits Times'
|
title = 'The Straits Times'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -25,23 +30,14 @@ class StraitsTimes(BasicNewsRecipe):
|
|||||||
conversion_options = {
|
conversion_options = {
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||||
}
|
}
|
||||||
|
keep_only_tags = [
|
||||||
preprocess_regexps = [
|
classes('node-header node-subheadline group-byline-info group-updated-timestamp group-image-frame field-name-body')
|
||||||
(re.compile(
|
|
||||||
r'<meta name="description" content="[^"]+"\s*/?>',
|
|
||||||
re.IGNORECASE | re.DOTALL),
|
|
||||||
lambda m:''),
|
|
||||||
(re.compile(r'<!--.+?-->', re.IGNORECASE | re.DOTALL),
|
|
||||||
lambda m: ''),
|
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object', 'link', 'map', 'style']),
|
classes('st_telegram_boilerplate'),
|
||||||
dict(attrs={'class': 'st2014-realted-links'}),
|
dict(name='source'),
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'class': 'story'})]
|
|
||||||
remove_tags_after = dict(name='div', attrs={'class': 'hr_thin'})
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Top of the News' , u'http://www.straitstimes.com/print/top-of-the-news/rss.xml')
|
(u'Top of the News' , u'http://www.straitstimes.com/print/top-of-the-news/rss.xml')
|
||||||
,(u'World' , u'http://www.straitstimes.com/print/world/rss.xml')
|
,(u'World' , u'http://www.straitstimes.com/print/world/rss.xml')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user