mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-07 09:01:38 -04:00
Fix TSN and St. Louis Post DIspatch
This commit is contained in:
parent
d52a659da2
commit
e0d0eb1973
@ -7,12 +7,16 @@ class AdvancedUserRecipe1282093204(BasicNewsRecipe):
|
|||||||
|
|
||||||
oldest_article = 1
|
oldest_article = 1
|
||||||
max_articles_per_feed = 15
|
max_articles_per_feed = 15
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
masthead_url = 'http://farm5.static.flickr.com/4118/4929686950_0e22e2c88a.jpg'
|
masthead_url = 'http://farm5.static.flickr.com/4118/4929686950_0e22e2c88a.jpg'
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'News-Bill McClellan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fbill-mclellan&f=rss&t=article'),
|
(u'News-Bill McClellan', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fbill-mclellan&f=rss&t=article'),
|
||||||
(u'News-Columns', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcolumns*&l=50&f=rss&t=article'),
|
(u'News-Columns', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcolumns*&l=50&f=rss&t=article'),
|
||||||
(u'News-Crime & Courtshttp://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcrime-and-courts&l=50&f=rss&t=article'),
|
(u'News-Crime & Courts', 'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2Fcrime-and-courts&l=50&f=rss&t=article'),
|
||||||
(u'News-Deb Peterson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fdeb-peterson&f=rss&t=article'),
|
(u'News-Deb Peterson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fcolumns%2Fdeb-peterson&f=rss&t=article'),
|
||||||
(u'News-Education', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2feducation&f=rss&t=article'),
|
(u'News-Education', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2feducation&f=rss&t=article'),
|
||||||
(u'News-Government & Politics', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fgovt-and-politics&f=rss&t=article'),
|
(u'News-Government & Politics', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=news%2Flocal%2fgovt-and-politics&f=rss&t=article'),
|
||||||
@ -62,9 +66,9 @@ class AdvancedUserRecipe1282093204(BasicNewsRecipe):
|
|||||||
(u'Entertainment-House-O-Fun', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fhouse-o-fun&l=100&f=rss&t=article'),
|
(u'Entertainment-House-O-Fun', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fhouse-o-fun&l=100&f=rss&t=article'),
|
||||||
(u'Entertainment-Kevin C. Johnson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fmusic%2Fkevin-johnson&l=100&f=rss&t=article')
|
(u'Entertainment-Kevin C. Johnson', u'http://www2.stltoday.com/search/?q=&d1=&d2=&s=start_time&sd=desc&c=entertainment%2Fmusic%2Fkevin-johnson&l=100&f=rss&t=article')
|
||||||
]
|
]
|
||||||
remove_empty_feeds = True
|
#remove_empty_feeds = True
|
||||||
remove_tags = [dict(name='div', attrs={'id':'blox-logo'}),dict(name='a')]
|
#remove_tags = [dict(name='div', attrs={'id':'blox-logo'}),dict(name='a')]
|
||||||
keep_only_tags = [dict(name='h1'), dict(name='p', attrs={'class':'byline'}), dict(name="div", attrs={'id':'blox-story-text'})]
|
#keep_only_tags = [dict(name='h1'), dict(name='p', attrs={'class':'byline'}), dict(name="div", attrs={'id':'blox-story-text'})]
|
||||||
extra_css = 'p {text-align: left;}'
|
extra_css = 'p {text-align: left;}'
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,28 +7,15 @@ class AdvancedUserRecipe1289990851(BasicNewsRecipe):
|
|||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nexus'
|
__author__ = 'Nexus'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
auto_cleanup = True
|
||||||
|
use_embedded_content = False
|
||||||
INDEX = 'http://tsn.ca/nhl/story/?id=nhl'
|
INDEX = 'http://tsn.ca/nhl/story/?id=nhl'
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}),
|
#keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}),
|
||||||
dict(name='div', attrs={'id':['tsnStory']})]
|
#dict(name='div', attrs={'id':['tsnStory']})]
|
||||||
remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}),
|
#remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}),
|
||||||
dict(name='div', attrs={'class':'textSize'})]
|
#dict(name='div', attrs={'class':'textSize'})]
|
||||||
|
|
||||||
def parse_index(self):
|
|
||||||
feeds = []
|
|
||||||
soup = self.index_to_soup(self.INDEX)
|
|
||||||
feed_parts = soup.findAll('div', attrs={'class': 'feature'})
|
|
||||||
for feed_part in feed_parts:
|
|
||||||
articles = []
|
|
||||||
if not feed_part.h2:
|
|
||||||
continue
|
|
||||||
feed_title = feed_part.h2.string
|
|
||||||
article_parts = feed_part.findAll('a')
|
|
||||||
for article_part in article_parts:
|
|
||||||
article_title = article_part.string
|
|
||||||
article_date = ''
|
|
||||||
article_url = 'http://tsn.ca/' + article_part['href']
|
|
||||||
articles.append({'title': article_title, 'url': article_url, 'description':'', 'date':article_date})
|
|
||||||
if articles:
|
|
||||||
feeds.append((feed_title, articles))
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News',
|
||||||
|
'http://www.tsn.ca/datafiles/rss/Stories.xml'),
|
||||||
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user