mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
...
This commit is contained in:
parent
036cea09d7
commit
29f58de5c0
@ -5,14 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
'''
|
'''
|
||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
import string, re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
## title = u'Ottawa Citizen'
|
## title = u'Ottawa Citizen'
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
## url_prefix = 'http://www.ottawacitizen.com'
|
||||||
## description = u'News from Ottawa, ON'
|
## description = u'News from Ottawa, ON'
|
||||||
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
## logo_url = 'oclogo.jpg'
|
## logo_url = 'oclogo.jpg'
|
||||||
## fp_tag = 'CAN_OC'
|
## fp_tag = 'CAN_OC'
|
||||||
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
Kindle_Fire=False
|
Kindle_Fire=False
|
||||||
masthead_url = std_logo_url
|
masthead_url = std_logo_url
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||||
|
|
||||||
remove_tags = [{'class':'comments'},
|
remove_tags = [{'class':'comments'},
|
||||||
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, datetime, date
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
if self.Kindle_Fire:
|
if self.Kindle_Fire:
|
||||||
from calibre import fit_image
|
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(path_to_image)
|
img.open(path_to_image)
|
||||||
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
div.insert(0,img)
|
div.insert(0,img)
|
||||||
allpics.append(div)
|
allpics.append(div)
|
||||||
pgall.replaceWith(allpics)
|
pgall.replaceWith(allpics)
|
||||||
|
|
||||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||||
pg.extract()
|
pg.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
|
||||||
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if 'GALLERY' in title.upper():
|
if 'GALLERY' in title.upper():
|
||||||
return
|
return
|
||||||
if 'PHOTOS' in title.upper():
|
if 'PHOTOS' in title.upper():
|
||||||
return
|
return
|
||||||
dtag = adiv.find('div','content')
|
dtag = adiv.find('div','content')
|
||||||
description=''
|
description=''
|
||||||
print("URL "+url)
|
print("URL "+url)
|
||||||
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
parse_web_index(k,url)
|
parse_web_index(k,url)
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -5,14 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
'''
|
'''
|
||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
import string, re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -23,7 +18,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
(u'Vancouver',u'/news/vancouver/index.html'),
|
(u'Vancouver',u'/news/vancouver/index.html'),
|
||||||
(u'Calgary',u'/news/calgary/index.html'),
|
(u'Calgary',u'/news/calgary/index.html'),
|
||||||
(u'Edmonton',u'/news/edmonton/index.html'),
|
(u'Edmonton',u'/news/edmonton/index.html'),
|
||||||
(u'Montreal',u'/news/montreal/index.html'),,
|
(u'Montreal',u'/news/montreal/index.html'),
|
||||||
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
|
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
|
||||||
(u'British Columbia',u'/news/bc/index.html'),
|
(u'British Columbia',u'/news/bc/index.html'),
|
||||||
(u'Alberta',u'/news/alberta/index.html'),
|
(u'Alberta',u'/news/alberta/index.html'),
|
||||||
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
## title = u'Ottawa Citizen'
|
## title = u'Ottawa Citizen'
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
## url_prefix = 'http://www.ottawacitizen.com'
|
||||||
## description = u'News from Ottawa, ON'
|
## description = u'News from Ottawa, ON'
|
||||||
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
## logo_url = 'oclogo.jpg'
|
## logo_url = 'oclogo.jpg'
|
||||||
## fp_tag = 'CAN_OC'
|
## fp_tag = 'CAN_OC'
|
||||||
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
Kindle_Fire=False
|
Kindle_Fire=False
|
||||||
masthead_url = std_logo_url
|
masthead_url = std_logo_url
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||||
|
|
||||||
remove_tags = [{'class':'comments'},
|
remove_tags = [{'class':'comments'},
|
||||||
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, datetime, date
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
if self.Kindle_Fire:
|
if self.Kindle_Fire:
|
||||||
from calibre import fit_image
|
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(path_to_image)
|
img.open(path_to_image)
|
||||||
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
div.insert(0,img)
|
div.insert(0,img)
|
||||||
allpics.append(div)
|
allpics.append(div)
|
||||||
pgall.replaceWith(allpics)
|
pgall.replaceWith(allpics)
|
||||||
|
|
||||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||||
pg.extract()
|
pg.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
|
||||||
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if 'GALLERY' in title.upper():
|
if 'GALLERY' in title.upper():
|
||||||
return
|
return
|
||||||
if 'PHOTOS' in title.upper():
|
if 'PHOTOS' in title.upper():
|
||||||
return
|
return
|
||||||
dtag = adiv.find('div','content')
|
dtag = adiv.find('div','content')
|
||||||
description=''
|
description=''
|
||||||
print("URL "+url)
|
print("URL "+url)
|
||||||
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
parse_web_index(k,url)
|
parse_web_index(k,url)
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -5,14 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
'''
|
'''
|
||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
import string, re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
## title = u'Ottawa Citizen'
|
## title = u'Ottawa Citizen'
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
## url_prefix = 'http://www.ottawacitizen.com'
|
||||||
## description = u'News from Ottawa, ON'
|
## description = u'News from Ottawa, ON'
|
||||||
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
## logo_url = 'oclogo.jpg'
|
## logo_url = 'oclogo.jpg'
|
||||||
## fp_tag = 'CAN_OC'
|
## fp_tag = 'CAN_OC'
|
||||||
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
Kindle_Fire=False
|
Kindle_Fire=False
|
||||||
masthead_url = std_logo_url
|
masthead_url = std_logo_url
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||||
|
|
||||||
remove_tags = [{'class':'comments'},
|
remove_tags = [{'class':'comments'},
|
||||||
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, datetime, date
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
if self.Kindle_Fire:
|
if self.Kindle_Fire:
|
||||||
from calibre import fit_image
|
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(path_to_image)
|
img.open(path_to_image)
|
||||||
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
div.insert(0,img)
|
div.insert(0,img)
|
||||||
allpics.append(div)
|
allpics.append(div)
|
||||||
pgall.replaceWith(allpics)
|
pgall.replaceWith(allpics)
|
||||||
|
|
||||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||||
pg.extract()
|
pg.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
|
||||||
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if 'GALLERY' in title.upper():
|
if 'GALLERY' in title.upper():
|
||||||
return
|
return
|
||||||
if 'PHOTOS' in title.upper():
|
if 'PHOTOS' in title.upper():
|
||||||
return
|
return
|
||||||
dtag = adiv.find('div','content')
|
dtag = adiv.find('div','content')
|
||||||
description=''
|
description=''
|
||||||
print("URL "+url)
|
print("URL "+url)
|
||||||
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
parse_web_index(k,url)
|
parse_web_index(k,url)
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -5,14 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
'''
|
'''
|
||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
import string, re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
title = u'Ottawa Citizen'
|
title = u'Ottawa Citizen'
|
||||||
url_prefix = 'http://www.ottawacitizen.com'
|
url_prefix = 'http://www.ottawacitizen.com'
|
||||||
description = u'News from Ottawa, ON'
|
description = u'News from Ottawa, ON'
|
||||||
std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
logo_url = 'oclogo.jpg'
|
logo_url = 'oclogo.jpg'
|
||||||
fp_tag = 'CAN_OC'
|
fp_tag = 'CAN_OC'
|
||||||
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
Kindle_Fire=False
|
Kindle_Fire=False
|
||||||
masthead_url = std_logo_url
|
masthead_url = std_logo_url
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||||
|
|
||||||
remove_tags = [{'class':'comments'},
|
remove_tags = [{'class':'comments'},
|
||||||
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, datetime, date
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
if self.Kindle_Fire:
|
if self.Kindle_Fire:
|
||||||
from calibre import fit_image
|
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(path_to_image)
|
img.open(path_to_image)
|
||||||
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
div.insert(0,img)
|
div.insert(0,img)
|
||||||
allpics.append(div)
|
allpics.append(div)
|
||||||
pgall.replaceWith(allpics)
|
pgall.replaceWith(allpics)
|
||||||
|
|
||||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||||
pg.extract()
|
pg.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
|
||||||
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if 'GALLERY' in title.upper():
|
if 'GALLERY' in title.upper():
|
||||||
return
|
return
|
||||||
if 'PHOTOS' in title.upper():
|
if 'PHOTOS' in title.upper():
|
||||||
return
|
return
|
||||||
dtag = adiv.find('div','content')
|
dtag = adiv.find('div','content')
|
||||||
description=''
|
description=''
|
||||||
print("URL "+url)
|
print("URL "+url)
|
||||||
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
parse_web_index(k,url)
|
parse_web_index(k,url)
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -5,14 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
'''
|
'''
|
||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
import string, re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
## title = u'Ottawa Citizen'
|
## title = u'Ottawa Citizen'
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
## url_prefix = 'http://www.ottawacitizen.com'
|
||||||
## description = u'News from Ottawa, ON'
|
## description = u'News from Ottawa, ON'
|
||||||
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
## logo_url = 'oclogo.jpg'
|
## logo_url = 'oclogo.jpg'
|
||||||
## fp_tag = 'CAN_OC'
|
## fp_tag = 'CAN_OC'
|
||||||
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
Kindle_Fire=False
|
Kindle_Fire=False
|
||||||
masthead_url = std_logo_url
|
masthead_url = std_logo_url
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||||
|
|
||||||
remove_tags = [{'class':'comments'},
|
remove_tags = [{'class':'comments'},
|
||||||
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, datetime, date
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
if self.Kindle_Fire:
|
if self.Kindle_Fire:
|
||||||
from calibre import fit_image
|
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(path_to_image)
|
img.open(path_to_image)
|
||||||
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
div.insert(0,img)
|
div.insert(0,img)
|
||||||
allpics.append(div)
|
allpics.append(div)
|
||||||
pgall.replaceWith(allpics)
|
pgall.replaceWith(allpics)
|
||||||
|
|
||||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||||
pg.extract()
|
pg.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
|
||||||
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if 'GALLERY' in title.upper():
|
if 'GALLERY' in title.upper():
|
||||||
return
|
return
|
||||||
if 'PHOTOS' in title.upper():
|
if 'PHOTOS' in title.upper():
|
||||||
return
|
return
|
||||||
dtag = adiv.find('div','content')
|
dtag = adiv.find('div','content')
|
||||||
description=''
|
description=''
|
||||||
print("URL "+url)
|
print("URL "+url)
|
||||||
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
parse_web_index(k,url)
|
parse_web_index(k,url)
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -5,14 +5,9 @@ __license__ = 'GPL v3'
|
|||||||
'''
|
'''
|
||||||
www.canada.com
|
www.canada.com
|
||||||
'''
|
'''
|
||||||
import string, re
|
import re
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
import string, re
|
|
||||||
from calibre import strftime
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
|
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
class CanWestPaper(BasicNewsRecipe):
|
class CanWestPaper(BasicNewsRecipe):
|
||||||
@ -90,7 +85,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
# un-comment the following six lines for the Ottawa Citizen
|
# un-comment the following six lines for the Ottawa Citizen
|
||||||
## title = u'Ottawa Citizen'
|
## title = u'Ottawa Citizen'
|
||||||
## url_prefix = 'http://www.ottawacitizen.com'
|
## url_prefix = 'http://www.ottawacitizen.com'
|
||||||
## description = u'News from Ottawa, ON'
|
## description = u'News from Ottawa, ON'
|
||||||
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
|
||||||
## logo_url = 'oclogo.jpg'
|
## logo_url = 'oclogo.jpg'
|
||||||
## fp_tag = 'CAN_OC'
|
## fp_tag = 'CAN_OC'
|
||||||
@ -106,7 +101,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
Kindle_Fire=False
|
Kindle_Fire=False
|
||||||
masthead_url = std_logo_url
|
masthead_url = std_logo_url
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
language = 'en_CA'
|
language = 'en_CA'
|
||||||
__author__ = 'Nick Redding'
|
__author__ = 'Nick Redding'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -121,7 +116,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||||
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
#photocredit { font-size: xx-small; font-weight: normal; }'''
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
|
||||||
|
|
||||||
remove_tags = [{'class':'comments'},
|
remove_tags = [{'class':'comments'},
|
||||||
@ -135,7 +130,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
from datetime import timedelta, datetime, date
|
from datetime import timedelta, date
|
||||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
|
||||||
br = BasicNewsRecipe.get_browser()
|
br = BasicNewsRecipe.get_browser()
|
||||||
daysback=1
|
daysback=1
|
||||||
@ -158,7 +153,6 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
|
|
||||||
def prepare_masthead_image(self, path_to_image, out_path):
|
def prepare_masthead_image(self, path_to_image, out_path):
|
||||||
if self.Kindle_Fire:
|
if self.Kindle_Fire:
|
||||||
from calibre import fit_image
|
|
||||||
from calibre.utils.magick import Image, create_canvas
|
from calibre.utils.magick import Image, create_canvas
|
||||||
img = Image()
|
img = Image()
|
||||||
img.open(path_to_image)
|
img.open(path_to_image)
|
||||||
@ -244,12 +238,12 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
div.insert(0,img)
|
div.insert(0,img)
|
||||||
allpics.append(div)
|
allpics.append(div)
|
||||||
pgall.replaceWith(allpics)
|
pgall.replaceWith(allpics)
|
||||||
|
|
||||||
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
|
||||||
pg.extract()
|
pg.extract()
|
||||||
return self.strip_anchors(soup)
|
return self.strip_anchors(soup)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
|
||||||
@ -278,7 +272,7 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
if 'GALLERY' in title.upper():
|
if 'GALLERY' in title.upper():
|
||||||
return
|
return
|
||||||
if 'PHOTOS' in title.upper():
|
if 'PHOTOS' in title.upper():
|
||||||
return
|
return
|
||||||
dtag = adiv.find('div','content')
|
dtag = adiv.find('div','content')
|
||||||
description=''
|
description=''
|
||||||
print("URL "+url)
|
print("URL "+url)
|
||||||
@ -317,4 +311,4 @@ class CanWestPaper(BasicNewsRecipe):
|
|||||||
parse_web_index(k,url)
|
parse_web_index(k,url)
|
||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
# Look up custom column friendly name
|
# Look up custom column friendly name
|
||||||
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
|
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
|
||||||
if rule['pattern'] in [_('any value'),_('any date')]:
|
if rule['pattern'] in [_('any value'),_('any date')]:
|
||||||
rule_pattern = '.*'
|
rule['pattern'] = '.*'
|
||||||
elif rule['pattern'] == _('unspecified'):
|
elif rule['pattern'] == _('unspecified'):
|
||||||
rule['pattern'] = 'None'
|
rule['pattern'] = 'None'
|
||||||
if 'prefix' in rule:
|
if 'prefix' in rule:
|
||||||
|
@ -6,7 +6,6 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import mimetypes
|
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
Loading…
x
Reference in New Issue
Block a user