mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Delete windows_star.recipe
duplicate
This commit is contained in:
parent
04cb2ec6c6
commit
49934879c7
@ -1,105 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
www.canada.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class CanWestPaper(BasicNewsRecipe):
|
||||
|
||||
# un-comment the following three lines for the Windsor Star
|
||||
title = u'Windsor Star'
|
||||
url_prefix = 'http://www.windsorstar.com'
|
||||
description = u'News from Windsor, ON'
|
||||
|
||||
# un-comment the following three lines for the Ottawa Citizen
|
||||
# # title = u'Ottawa Citizen'
|
||||
# # url_prefix = 'http://www.ottawacitizen.com'
|
||||
# # description = u'News from Ottawa, ON'
|
||||
|
||||
# un-comment the following three lines for the Montreal Gazette
|
||||
# # title = u'Montreal Gazette'
|
||||
# # url_prefix = 'http://www.montrealgazette.com'
|
||||
# # description = u'News from Montreal, QC'
|
||||
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
extra_css = '''
|
||||
.timestamp { font-size:xx-small; display: block; }
|
||||
#storyheader { font-size: medium; }
|
||||
#storyheader h1 { font-size: x-large; }
|
||||
#storyheader h2 { font-size: large; font-style: italic; }
|
||||
.byline { font-size:xx-small; }
|
||||
#photocaption { font-size: small; font-style: italic }
|
||||
#photocredit { font-size: xx-small; }'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id': 'storyheader'}), dict(
|
||||
name='div', attrs={'id': 'storycontent'})]
|
||||
remove_tags = [{'class': 'comments'},
|
||||
dict(name='div', attrs={'class': 'navbar'}), dict(
|
||||
name='div', attrs={'class': 'morelinks'}),
|
||||
dict(name='div', attrs={'class': 'viewmore'}), dict(
|
||||
name='li', attrs={'class': 'email'}),
|
||||
dict(name='div', attrs={'class': 'story_tool_hr'}), dict(
|
||||
name='div', attrs={'class': 'clear'}),
|
||||
dict(name='div', attrs={'class': 'story_tool'}), dict(
|
||||
name='div', attrs={'class': 'copyright'}),
|
||||
dict(name='div', attrs={'class': 'rule_grey_solid'}),
|
||||
dict(name='li', attrs={'class': 'print'}), dict(name='li', attrs={'class': 'share'}), dict(name='ul', attrs={'class': 'bullet'})]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
# delete iempty id attributes--they screw up the TOC for unknown reasons
|
||||
divtags = soup.findAll('div', attrs={'id': ''})
|
||||
if divtags:
|
||||
for div in divtags:
|
||||
del div['id']
|
||||
return soup
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(
|
||||
self.url_prefix + '/news/todays-paper/index.html')
|
||||
|
||||
articles = {}
|
||||
key = 'News'
|
||||
ans = ['News']
|
||||
|
||||
# Find each instance of class="sectiontitle", class="featurecontent"
|
||||
for divtag in soup.findAll('div', attrs={'class': ['section_title02', 'featurecontent']}):
|
||||
if 'section_title' in ''.join(divtag['class']):
|
||||
# div contains section title
|
||||
if not divtag.h3:
|
||||
continue
|
||||
key = self.tag_to_string(divtag.h3, False)
|
||||
ans.append(key)
|
||||
self.log('Section name %s' % key)
|
||||
continue
|
||||
# div contains article data
|
||||
h1tag = divtag.find('h1')
|
||||
if not h1tag:
|
||||
continue
|
||||
atag = h1tag.find('a', href=True)
|
||||
if not atag:
|
||||
continue
|
||||
url = self.url_prefix + '/news/todays-paper/' + atag['href']
|
||||
title = self.tag_to_string(atag, False)
|
||||
pubdate = ''
|
||||
description = ''
|
||||
ptag = divtag.find('p')
|
||||
if ptag:
|
||||
description = self.tag_to_string(ptag, False)
|
||||
author = ''
|
||||
autag = divtag.find('h4')
|
||||
if autag:
|
||||
author = self.tag_to_string(autag, False)
|
||||
if key not in articles:
|
||||
articles[key] = []
|
||||
articles[key].append(dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author, content=''))
|
||||
|
||||
ans = [(keyl, articles[key]) for keyl in ans if keyl in articles]
|
||||
return ans
|
Loading…
x
Reference in New Issue
Block a user