This commit is contained in:
Kovid Goyal 2012-08-17 09:47:27 +05:30
parent 036cea09d7
commit 29f58de5c0
8 changed files with 62 additions and 99 deletions

View File

@ -5,14 +5,9 @@ __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' ## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg' ## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
Kindle_Fire=False Kindle_Fire=False
masthead_url = std_logo_url masthead_url = std_logo_url
url_list = [] url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})] keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, datetime, date from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
def prepare_masthead_image(self, path_to_image, out_path): def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire: if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas from calibre.utils.magick import Image, create_canvas
img = Image() img = Image()
img.open(path_to_image) img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
div.insert(0,img) div.insert(0,img)
allpics.append(div) allpics.append(div)
pgall.replaceWith(allpics) pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}): for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract() pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
if 'GALLERY' in title.upper(): if 'GALLERY' in title.upper():
return return
if 'PHOTOS' in title.upper(): if 'PHOTOS' in title.upper():
return return
dtag = adiv.find('div','content') dtag = adiv.find('div','content')
description='' description=''
print("URL "+url) print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
parse_web_index(k,url) parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -5,14 +5,9 @@ __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
@ -23,7 +18,7 @@ class CanWestPaper(BasicNewsRecipe):
(u'Vancouver',u'/news/vancouver/index.html'), (u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'), (u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'), (u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),, (u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'), (u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'), (u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'), (u'Alberta',u'/news/alberta/index.html'),
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' ## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg' ## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
Kindle_Fire=False Kindle_Fire=False
masthead_url = std_logo_url masthead_url = std_logo_url
url_list = [] url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})] keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, datetime, date from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
def prepare_masthead_image(self, path_to_image, out_path): def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire: if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas from calibre.utils.magick import Image, create_canvas
img = Image() img = Image()
img.open(path_to_image) img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
div.insert(0,img) div.insert(0,img)
allpics.append(div) allpics.append(div)
pgall.replaceWith(allpics) pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}): for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract() pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
if 'GALLERY' in title.upper(): if 'GALLERY' in title.upper():
return return
if 'PHOTOS' in title.upper(): if 'PHOTOS' in title.upper():
return return
dtag = adiv.find('div','content') dtag = adiv.find('div','content')
description='' description=''
print("URL "+url) print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
parse_web_index(k,url) parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -5,14 +5,9 @@ __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' ## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg' ## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
Kindle_Fire=False Kindle_Fire=False
masthead_url = std_logo_url masthead_url = std_logo_url
url_list = [] url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})] keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, datetime, date from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
def prepare_masthead_image(self, path_to_image, out_path): def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire: if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas from calibre.utils.magick import Image, create_canvas
img = Image() img = Image()
img.open(path_to_image) img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
div.insert(0,img) div.insert(0,img)
allpics.append(div) allpics.append(div)
pgall.replaceWith(allpics) pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}): for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract() pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
if 'GALLERY' in title.upper(): if 'GALLERY' in title.upper():
return return
if 'PHOTOS' in title.upper(): if 'PHOTOS' in title.upper():
return return
dtag = adiv.find('div','content') dtag = adiv.find('div','content')
description='' description=''
print("URL "+url) print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
parse_web_index(k,url) parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -5,14 +5,9 @@ __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
title = u'Ottawa Citizen' title = u'Ottawa Citizen'
url_prefix = 'http://www.ottawacitizen.com' url_prefix = 'http://www.ottawacitizen.com'
description = u'News from Ottawa, ON' description = u'News from Ottawa, ON'
std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
logo_url = 'oclogo.jpg' logo_url = 'oclogo.jpg'
fp_tag = 'CAN_OC' fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
Kindle_Fire=False Kindle_Fire=False
masthead_url = std_logo_url masthead_url = std_logo_url
url_list = [] url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})] keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, datetime, date from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
def prepare_masthead_image(self, path_to_image, out_path): def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire: if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas from calibre.utils.magick import Image, create_canvas
img = Image() img = Image()
img.open(path_to_image) img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
div.insert(0,img) div.insert(0,img)
allpics.append(div) allpics.append(div)
pgall.replaceWith(allpics) pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}): for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract() pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
if 'GALLERY' in title.upper(): if 'GALLERY' in title.upper():
return return
if 'PHOTOS' in title.upper(): if 'PHOTOS' in title.upper():
return return
dtag = adiv.find('div','content') dtag = adiv.find('div','content')
description='' description=''
print("URL "+url) print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
parse_web_index(k,url) parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -5,14 +5,9 @@ __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' ## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg' ## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
Kindle_Fire=False Kindle_Fire=False
masthead_url = std_logo_url masthead_url = std_logo_url
url_list = [] url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})] keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, datetime, date from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
def prepare_masthead_image(self, path_to_image, out_path): def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire: if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas from calibre.utils.magick import Image, create_canvas
img = Image() img = Image()
img.open(path_to_image) img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
div.insert(0,img) div.insert(0,img)
allpics.append(div) allpics.append(div)
pgall.replaceWith(allpics) pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}): for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract() pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
if 'GALLERY' in title.upper(): if 'GALLERY' in title.upper():
return return
if 'PHOTOS' in title.upper(): if 'PHOTOS' in title.upper():
return return
dtag = adiv.find('div','content') dtag = adiv.find('div','content')
description='' description=''
print("URL "+url) print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
parse_web_index(k,url) parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -5,14 +5,9 @@ __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re import re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
# un-comment the following six lines for the Ottawa Citizen # un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg' ## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg' ## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
Kindle_Fire=False Kindle_Fire=False
masthead_url = std_logo_url masthead_url = std_logo_url
url_list = [] url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; } .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }''' #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})] keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, datetime, date from datetime import timedelta, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
def prepare_masthead_image(self, path_to_image, out_path): def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire: if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas from calibre.utils.magick import Image, create_canvas
img = Image() img = Image()
img.open(path_to_image) img.open(path_to_image)
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
div.insert(0,img) div.insert(0,img)
allpics.append(div) allpics.append(div)
pgall.replaceWith(allpics) pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}): for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract() pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
if 'GALLERY' in title.upper(): if 'GALLERY' in title.upper():
return return
if 'PHOTOS' in title.upper(): if 'PHOTOS' in title.upper():
return return
dtag = adiv.find('div','content') dtag = adiv.find('div','content')
description='' description=''
print("URL "+url) print("URL "+url)
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
parse_web_index(k,url) parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -123,7 +123,7 @@ class PluginWidget(QWidget,Ui_Form):
# Look up custom column friendly name # Look up custom column friendly name
rule['field'] = self.eligible_custom_fields[rule['field']]['field'] rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'] in [_('any value'),_('any date')]: if rule['pattern'] in [_('any value'),_('any date')]:
rule_pattern = '.*' rule['pattern'] = '.*'
elif rule['pattern'] == _('unspecified'): elif rule['pattern'] == _('unspecified'):
rule['pattern'] = 'None' rule['pattern'] = 'None'
if 'prefix' in rule: if 'prefix' in rule:

View File

@ -6,7 +6,6 @@ __license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>' __copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import mimetypes
from contextlib import closing from contextlib import closing
from lxml import etree from lxml import etree