mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update The Straits Times
This commit is contained in:
parent
aabe59dd54
commit
da6c7c6c3c
@ -5,10 +5,15 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
www.straitstimes.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class StraitsTimes(BasicNewsRecipe):
|
||||
title = 'The Straits Times'
|
||||
__author__ = 'Darko Miletic'
|
||||
@ -25,23 +30,14 @@ class StraitsTimes(BasicNewsRecipe):
|
||||
conversion_options = {
|
||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(
|
||||
r'<meta name="description" content="[^"]+"\s*/?>',
|
||||
re.IGNORECASE | re.DOTALL),
|
||||
lambda m:''),
|
||||
(re.compile(r'<!--.+?-->', re.IGNORECASE | re.DOTALL),
|
||||
lambda m: ''),
|
||||
keep_only_tags = [
|
||||
classes('node-header node-subheadline group-byline-info group-updated-timestamp group-image-frame field-name-body')
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object', 'link', 'map', 'style']),
|
||||
dict(attrs={'class': 'st2014-realted-links'}),
|
||||
classes('st_telegram_boilerplate'),
|
||||
dict(name='source'),
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class': 'story'})]
|
||||
remove_tags_after = dict(name='div', attrs={'class': 'hr_thin'})
|
||||
|
||||
feeds = [
|
||||
(u'Top of the News' , u'http://www.straitstimes.com/print/top-of-the-news/rss.xml')
|
||||
,(u'World' , u'http://www.straitstimes.com/print/world/rss.xml')
|
||||
|
Loading…
x
Reference in New Issue
Block a user