added ebooksgratuits.com in the stores of "Get Books"

This commit is contained in:
Florent FAYOLLE 2012-08-16 22:37:21 +02:00
commit e7280044d5
44 changed files with 2823 additions and 926 deletions

View File

@ -16,6 +16,7 @@ class BusinessSpectator(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
auto_cleanup = True
#delay = 1 #delay = 1
use_embedded_content = False use_embedded_content = False
encoding = 'utf8' encoding = 'utf8'
@ -32,11 +33,11 @@ class BusinessSpectator(BasicNewsRecipe):
,'linearize_tables': False ,'linearize_tables': False
} }
keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')] #keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
remove_tags = [dict(attrs={'class':'hql'})] #remove_tags = [dict(attrs={'class':'hql'})]
remove_attributes = ['width','height','style'] #remove_attributes = ['width','height','style']
feeds = [ feeds = [
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'), ('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
@ -46,3 +47,4 @@ class BusinessSpectator(BasicNewsRecipe):
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'), ('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'), ('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
] ]

View File

@ -1,35 +1,320 @@
from calibre.web.feeds.news import BasicNewsRecipe #!/usr/bin/env python
# -*- coding: utf-8 -*-
class CalgaryHerald(BasicNewsRecipe): __license__ = 'GPL v3'
title = u'Calgary Herald'
oldest_article = 3 '''
max_articles_per_feed = 100 www.canada.com
'''
feeds = [ import string, re
(u'News', u'http://rss.canada.com/get/?F233'), from calibre import strftime
(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'), from calibre.web.feeds.news import BasicNewsRecipe
(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
(u'Politics', u'http://rss.canada.com/get/?F7551'), import string, re
(u'National', u'http://rss.canada.com/get/?F7552'), from calibre import strftime
(u'World', u'http://rss.canada.com/get/?F7553'), from calibre.web.feeds.recipes import BasicNewsRecipe
] from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
__author__ = 'rty'
pubisher = 'Calgary Herald'
description = 'Calgary, Alberta, Canada' class CanWestPaper(BasicNewsRecipe):
category = 'News, Calgary, Alberta, Canada'
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
remove_javascript = True (u'Ottawa & Area',u'/news/ottawa/index.html'),
use_embedded_content = False (u'Vancouver',u'/news/vancouver/index.html'),
no_stylesheets = True (u'Calgary',u'/news/calgary/index.html'),
language = 'en_CA' (u'Edmonton',u'/news/edmonton/index.html'),
encoding = 'utf-8' (u'Montreal',u'/news/montreal/index.html'),
conversion_options = {'linearize_tables':True} (u'Fraser Valley',u'/news/fraser-valley/index.html'),
##masthead_url = 'http://www.calgaryherald.com/index.html' (u'British Columbia',u'/news/bc/index.html'),
keep_only_tags = [ (u'Alberta',u'/news/alberta/index.html'),
dict(name='div', attrs={'id':'storyheader'}), (u'Canada',u'/news/canada/index.html'),
dict(name='div', attrs={'id':'storycontent'}) (u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
] (u'Insight',u'/news/insight/index.html'),
remove_tags_after = {'class':"story_tool_hr"} (u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following six lines for the Calgary Herald
title = u'Calgary Herald'
url_prefix = 'http://www.calgaryherald.com'
description = u'News from Calgary, AB'
std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
logo_url = 'chlogo.jpg'
fp_tag = 'CAN_CH'
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
articles = {}
ans = []
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,105 +1,141 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re
import re from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist postmedia_index_pages = [
## title = u'Victoria Times Colonist' (u'Headlines',u'/index.html'),
## url_prefix = 'http://www.timescolonist.com' (u'Ottawa & Area',u'/news/ottawa/index.html'),
## description = u'News from Victoria, BC' (u'Vancouver',u'/news/vancouver/index.html'),
## fp_tag = 'CAN_TC' (u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),,
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following four lines for the Vancouver Province
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province' ## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com' ## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC' ## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP' ## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following four lines for the Vancouver Sun # un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun' ## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com' ## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC' ## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS' ## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal # un-comment the following six lines for the Calgary Herald
title = u'Edmonton Journal'
url_prefix = 'http://www.edmontonjournal.com'
description = u'News from Edmonton, AB'
fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
## title = u'Calgary Herald' ## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com' ## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB' ## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH' ## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post # un-comment the following six lines for the Edmonton Journal
## title = u'Regina Leader-Post' title = u'Edmonton Journal'
## url_prefix = 'http://www.leaderpost.com' url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Regina, SK' description = u'News from Edmonton, AB'
## fp_tag = '' std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
logo_url = 'ejlogo.jpg'
fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Saskatoon Star-Phoenix # un-comment the following six lines for the Ottawa Citizen
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette # un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette' ## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com' ## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC' ## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG' ## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; } #storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; } #storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; } #storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; }''' .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})] #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}), dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}), dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}), dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}), dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}), dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})] dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, date from datetime import timedelta, datetime, date
if self.fp_tag=='':
return None
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
cover = None cover = None
return cover return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string): def fixChars(self,string):
# Replace lsquo (\x91) # Replace lsquo (\x91)
fixed = re.sub("\x91","",string) fixed = re.sub("\x91","",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace')) a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup return soup
def preprocess_html(self, soup):
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {} articles = {}
key = 'News' ans = []
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -1,48 +1,320 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following three lines for the Montreal Gazette postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following six lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following six lines for the Montreal Gazette
title = u'Montreal Gazette' title = u'Montreal Gazette'
url_prefix = 'http://www.montrealgazette.com'
description = u'News from Montreal, QC' description = u'News from Montreal, QC'
std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
logo_url = 'mglogo.jpg'
fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
auto_cleanup = True timefmt = ' [%b %d]'
auto_cleanup_keep = '//*[@id="imageBox"]' encoding = 'utf-8'
timefmt = ' [%b %d]'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; } #storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; } #storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; } #storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; }''' .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
feeds = [ def get_cover_url(self):
('News', from datetime import timedelta, datetime, date
'http://rss.canada.com/get/?F297'), cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
('Sports', br = BasicNewsRecipe.get_browser()
'http://rss.canada.com/get/?F299'), daysback=1
('Entertainment', try:
'http://rss.canada.com/get/?F7366'), br.open(cover)
('Business', except:
'http://rss.canada.com/get/?F6939'), while daysback<7:
] cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
articles = {}
ans = []
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,105 +1,141 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re
import re from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist postmedia_index_pages = [
## title = u'Victoria Times Colonist' (u'Headlines',u'/index.html'),
## url_prefix = 'http://www.timescolonist.com' (u'Ottawa & Area',u'/news/ottawa/index.html'),
## description = u'News from Victoria, BC' (u'Vancouver',u'/news/vancouver/index.html'),
## fp_tag = 'CAN_TC' (u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following four lines for the Vancouver Province
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province' ## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com' ## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC' ## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP' ## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following four lines for the Vancouver Sun # un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun' ## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com' ## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC' ## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS' ## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal # un-comment the following six lines for the Calgary Herald
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
## title = u'Calgary Herald' ## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com' ## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB' ## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH' ## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post # un-comment the following six lines for the Edmonton Journal
## title = u'Regina Leader-Post' ## title = u'Edmonton Journal'
## url_prefix = 'http://www.leaderpost.com' ## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Regina, SK' ## description = u'News from Edmonton, AB'
## fp_tag = '' ## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Saskatoon Star-Phoenix # un-comment the following six lines for the Ottawa Citizen
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
title = u'Ottawa Citizen' title = u'Ottawa Citizen'
url_prefix = 'http://www.ottawacitizen.com' url_prefix = 'http://www.ottawacitizen.com'
description = u'News from Ottawa, ON' description = u'News from Ottawa, ON'
std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
logo_url = 'oclogo.jpg'
fp_tag = 'CAN_OC' fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette # un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette' ## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com' ## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC' ## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG' ## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; } #storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; } #storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; } #storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; }''' .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})] #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}), dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}), dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}), dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}), dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}), dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})] dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, date from datetime import timedelta, datetime, date
if self.fp_tag=='':
return None
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
cover = None cover = None
return cover return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string): def fixChars(self,string):
# Replace lsquo (\x91) # Replace lsquo (\x91)
fixed = re.sub("\x91","",string) fixed = re.sub("\x91","",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace')) a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup return soup
def preprocess_html(self, soup):
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {} articles = {}
key = 'News' ans = []
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -1,136 +1,320 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following three lines for the Vancouver Province postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
title = u'Vancouver Province' title = u'Vancouver Province'
url_prefix = 'http://www.theprovince.com' url_prefix = 'http://www.theprovince.com'
description = u'News from Vancouver, BC' description = u'News from Vancouver, BC'
std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
logo_url = 'vplogo.jpg'
fp_tag = 'CAN_TP'
# un-comment the following three lines for the Vancouver Sun # un-comment the following six lines for the Vancouver Sun
#title = u'Vancouver Sun' ## title = u'Vancouver Sun'
#url_prefix = 'http://www.vancouversun.com' ## url_prefix = 'http://www.vancouversun.com'
#description = u'News from Vancouver, BC' ## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following three lines for the Edmonton Journal # un-comment the following six lines for the Calgary Herald
#title = u'Edmonton Journal' ## title = u'Calgary Herald'
#url_prefix = 'http://www.edmontonjournal.com' ## url_prefix = 'http://www.calgaryherald.com'
#description = u'News from Edmonton, AB' ## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following three lines for the Calgary Herald # un-comment the following six lines for the Edmonton Journal
#title = u'Calgary Herald' ## title = u'Edmonton Journal'
#url_prefix = 'http://www.calgaryherald.com' ## url_prefix = 'http://www.edmontonjournal.com'
#description = u'News from Calgary, AB' ## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following three lines for the Regina Leader-Post # un-comment the following six lines for the Ottawa Citizen
#title = u'Regina Leader-Post' ## title = u'Ottawa Citizen'
#url_prefix = 'http://www.leaderpost.com' ## url_prefix = 'http://www.ottawacitizen.com'
#description = u'News from Regina, SK' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following three lines for the Saskatoon Star-Phoenix # un-comment the following six lines for the Montreal Gazette
#title = u'Saskatoon Star-Phoenix' ## title = u'Montreal Gazette'
#url_prefix = 'http://www.thestarphoenix.com' ## url_prefix = 'http://www.montrealgazette.com'
#description = u'News from Saskatoon, SK' ## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
# un-comment the following three lines for the Windsor Star ## logo_url = 'mglogo.jpg'
#title = u'Windsor Star' ## fp_tag = 'CAN_MG'
#url_prefix = 'http://www.windsorstar.com'
#description = u'News from Windsor, ON'
# un-comment the following three lines for the Ottawa Citizen
#title = u'Ottawa Citizen'
#url_prefix = 'http://www.ottawacitizen.com'
#description = u'News from Ottawa, ON'
# un-comment the following three lines for the Montreal Gazette
#title = u'Montreal Gazette'
#url_prefix = 'http://www.montrealgazette.com'
#description = u'News from Montreal, QC'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; } #storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; } #storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; } #storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; }''' .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})] #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}), dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}), dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}), dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}), dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}), dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})] dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup): def preprocess_html(self,soup):
#delete iempty id attributes--they screw up the TOC for unknow reasons #delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''}) divtags = soup.findAll('div',attrs={'id':''})
if divtags: if divtags:
for div in divtags: for div in divtags:
del(div['id']) del(div['id'])
return soup
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {} articles = {}
key = 'News' ans = []
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -1,105 +1,141 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
__license__ = 'GPL v3' __license__ = 'GPL v3'
''' '''
www.canada.com www.canada.com
''' '''
import string, re
import re from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe): class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist postmedia_index_pages = [
## title = u'Victoria Times Colonist' (u'Headlines',u'/index.html'),
## url_prefix = 'http://www.timescolonist.com' (u'Ottawa & Area',u'/news/ottawa/index.html'),
## description = u'News from Victoria, BC' (u'Vancouver',u'/news/vancouver/index.html'),
## fp_tag = 'CAN_TC' (u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following four lines for the Vancouver Province
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province' ## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com' ## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC' ## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP' ## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following four lines for the Vancouver Sun # un-comment the following six lines for the Vancouver Sun
title = u'Vancouver Sun' title = u'Vancouver Sun'
url_prefix = 'http://www.vancouversun.com' url_prefix = 'http://www.vancouversun.com'
description = u'News from Vancouver, BC' description = u'News from Vancouver, BC'
std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
logo_url = 'vslogo.jpg'
fp_tag = 'CAN_VS' fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal # un-comment the following six lines for the Calgary Herald
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
## title = u'Calgary Herald' ## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com' ## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB' ## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH' ## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post # un-comment the following six lines for the Edmonton Journal
## title = u'Regina Leader-Post' ## title = u'Edmonton Journal'
## url_prefix = 'http://www.leaderpost.com' ## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Regina, SK' ## description = u'News from Edmonton, AB'
## fp_tag = '' ## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Saskatoon Star-Phoenix # un-comment the following six lines for the Ottawa Citizen
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
## title = u'Ottawa Citizen' ## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com' ## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON' ## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC' ## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette # un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette' ## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com' ## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC' ## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG' ## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA' language = 'en_CA'
__author__ = 'Nick Redding' __author__ = 'Nick Redding'
no_stylesheets = True no_stylesheets = True
timefmt = ' [%b %d]' timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = ''' extra_css = '''
.timestamp { font-size:xx-small; display: block; } .timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; } #storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; } #storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; } #storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; } .byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic } #photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; }''' .photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})] #photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'}, remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}), dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}), dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}), dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}), dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}), dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})] dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self): def get_cover_url(self):
from datetime import timedelta, date from datetime import timedelta, datetime, date
if self.fp_tag=='':
return None
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg' cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
daysback=1 daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
cover = None cover = None
return cover return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string): def fixChars(self,string):
# Replace lsquo (\x91) # Replace lsquo (\x91)
fixed = re.sub("\x91","",string) fixed = re.sub("\x91","",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace')) a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup return soup
def preprocess_html(self, soup):
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup) return self.strip_anchors(soup)
def parse_index(self): def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {} articles = {}
key = 'News' ans = []
ans = ['News']
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
description = ''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans

View File

@ -1,35 +1,229 @@
body { background-color: white; } body { background-color: white; }
/*
** The following rules apply principally to the line items shown in the
** Authors, Titles, Genres, Series, and Recently Added sections. Rules for the
** Descriptions section are grouped together later in the file.
** ------------------------------------------------------------------------
*/
/*
** <div> grouping an author's works together
** Used in Sections:
** Authors
**
** Minimize widows and orphans by logically grouping chunks
** Some reports of problems with Sony (ADE) ereaders
** ADE: page-break-inside:avoid;
** iBooks: display:inline-block;
** width:100%;
*/
div.author_logical_group {
page-break-inside:avoid;
}
/*
** Force page break when starting new initial letter
** Used in Sections:
** Authors
** Titles
*/
div.initial_letter {
page-break-before:always;
}
/*
** Author name
** Used in Sections:
** Authors
** Genres
** Recently Added
*/
p.author_index {
clear:both;
font-size:large;
font-weight:bold;
text-align:left;
margin-top:0.25px;
margin-bottom:-2px;
text-indent: 0em;
}
/*
** Index letter
** Used in Sections:
** Authors
** Titles
*/
p.author_title_letter_index {
clear:both;
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:0px;
margin-bottom:0px;
}
/*
** Index letter
** Used in Sections:
** Series
*/
p.series_letter_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
/*
** Month-Year
** Used in Sections:
** Recently Added
*/
p.date_index {
clear:both;
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
p.date_read {
clear:both;
text-align:left;
margin-top:0px;
margin-bottom:0px;
margin-left:6em;
text-indent:-6em;
}
/*
** Series name
** Used in Sections:
** Authors
** Series
** Genres
** Recently Added
** Optimized for ePub
*/
p.series {
clear:both;
font-style:italic;
margin-top:0.10em;
margin-bottom:0em;
margin-left:1.5em;
text-align:left;
text-indent:-1.25em;
}
/*
** Series name
** Used in Sections:
** Authors
** Series
** Genres
** Recently Added
** Optimized for mobi
*/
p.series_mobi {
clear:both;
font-style:italic;
margin-top:0em;
margin-bottom:0em;
margin-left:0em;
text-align:left;
text-indent:-30px;
}
/*
** Section title
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
** Descriptions
*/
p.title {
margin-top:0em;
margin-bottom:0em;
text-align:center;
font-style:italic;
font-size:xx-large;
}
/*
** Line item book listing
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
*/
p.line_item {
clear: both;
font-family:monospace;
margin-top:0px;
margin-bottom:0px;
margin-left:2em;
text-align:left;
text-indent:-2em;
}
/*
** Prefix
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
*/
span.prefix {
float:left;
margin-left: 0.25em;
text-align: left;
vertical-align: middle;
width: 1.5em;
}
/*
** Book details entry
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
*/
span.entry {
font-family: serif;
vertical-align:middle;
}
/*
** The following rules apply to Descriptions
** -----------------------------------------
*/
/*
** Link to Series
*/
a.series_id { a.series_id {
font-style:normal; font-style:normal;
font-size:large; font-size:large;
} }
/* /*
* Minimize widows and orphans by logically grouping chunks ** Various dividers
* Some reports of problems with Sony (ADE) ereaders
* ADE: page-break-inside:avoid;
* iBooks: display:inline-block;
* width:100%;
*/ */
div.author_logical_group {
page-break-inside:avoid;
}
div.description > p:first-child {
margin: 0 0 0 0;
text-indent: 0em;
}
div.description {
margin: 0 0 0 0;
text-indent: 1em;
}
div.initial_letter {
page-break-before:always;
}
hr.annotations_divider { hr.annotations_divider {
width:50%; width:50%;
margin-left:1em; margin-left:1em;
@ -63,47 +257,21 @@ hr.merged_comments_divider {
border-left: solid white 0px; border-left: solid white 0px;
} }
p.date_read { /*
text-align:left; ** Author name
margin-top:0px; */
margin-bottom:0px;
margin-left:6em;
text-indent:-6em;
}
p.author { p.author {
clear:both;
font-size:large; font-size:large;
margin-top:0em; margin-top:0em;
margin-bottom:0em; margin-bottom:0.1em;
text-align: center; text-align: center;
text-indent: 0em; text-indent: 0em;
} }
p.author_index { /*
font-size:large; ** Formats
font-weight:bold; */
text-align:left;
margin-top:0.25px;
margin-bottom:-2px;
text-indent: 0em;
}
p.author_title_letter_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:0px;
margin-bottom:0px;
}
p.date_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
p.formats { p.formats {
font-size:90%; font-size:90%;
margin-top:0em; margin-top:0em;
@ -112,6 +280,9 @@ p.formats {
text-indent: 0.0in; text-indent: 0.0in;
} }
/*
** Genres
*/
p.genres { p.genres {
font-style:normal; font-style:normal;
margin-top:0.5em; margin-top:0.5em;
@ -120,68 +291,55 @@ p.genres {
text-indent: 0.0in; text-indent: 0.0in;
} }
p.series {
font-style:italic;
margin-top:0.25em;
margin-bottom:0em;
margin-left:2em;
text-align:left;
text-indent:-2em;
}
/*
** Series name
*/
p.series_id { p.series_id {
margin-top:0em; margin-top:0em;
margin-bottom:0em; margin-bottom:0em;
text-align:center; text-align:center;
} }
p.series_letter_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
p.title {
margin-top:0em;
margin-bottom:0em;
text-align:center;
font-style:italic;
font-size:xx-large;
}
p.wishlist_item, p.unread_book, p.read_book, p.line_item {
font-family:monospace;
margin-top:0px;
margin-bottom:0px;
margin-left:2em;
text-align:left;
text-indent:-2em;
}
span.prefix {}
span.entry {
font-family: serif;
}
/* /*
* Book Descriptions ** Publisher, Publication Date
*/ */
td.publisher, td.date { td.publisher, td.date {
font-weight:bold; font-weight:bold;
text-align:center; text-align:center;
} }
/*
** Rating
*/
td.rating{ td.rating{
text-align:center; text-align:center;
} }
/*
** Additional notes
*/
td.notes { td.notes {
font-size: 100%; font-size: 100%;
text-align:center; text-align:center;
} }
/*
** Thumbnail
*/
td.thumbnail img { td.thumbnail img {
-webkit-box-shadow: 4px 4px 12px #999; -webkit-box-shadow: 4px 4px 12px #999;
} }
/*
** Comments
*/
div.description {
margin: 0 0 0 0;
text-indent: 1em;
}
div.description > p:first-child {
margin: 0 0 0 0;
text-indent: 0em;
}

View File

@ -172,6 +172,7 @@ if iswindows:
[ [
'calibre/devices/mtp/windows/utils.cpp', 'calibre/devices/mtp/windows/utils.cpp',
'calibre/devices/mtp/windows/device_enumeration.cpp', 'calibre/devices/mtp/windows/device_enumeration.cpp',
'calibre/devices/mtp/windows/device.cpp',
'calibre/devices/mtp/windows/wpd.cpp', 'calibre/devices/mtp/windows/wpd.cpp',
], ],
headers=[ headers=[
@ -298,7 +299,8 @@ class Build(Command):
self.obj_dir = os.path.join(os.path.dirname(SRC), 'build', 'objects') self.obj_dir = os.path.join(os.path.dirname(SRC), 'build', 'objects')
if not os.path.exists(self.obj_dir): if not os.path.exists(self.obj_dir):
os.makedirs(self.obj_dir) os.makedirs(self.obj_dir)
self.build_style(self.j(self.SRC, 'calibre', 'plugins')) if not opts.only:
self.build_style(self.j(self.SRC, 'calibre', 'plugins'))
for ext in extensions: for ext in extensions:
if opts.only != 'all' and opts.only != ext.name: if opts.only != 'all' and opts.only != ext.name:
continue continue

View File

@ -38,7 +38,7 @@ binary_includes = [
'/lib/libz.so.1', '/lib/libz.so.1',
'/usr/lib/libtiff.so.5', '/usr/lib/libtiff.so.5',
'/lib/libbz2.so.1', '/lib/libbz2.so.1',
'/usr/lib/libpoppler.so.25', '/usr/lib/libpoppler.so.27',
'/usr/lib/libxml2.so.2', '/usr/lib/libxml2.so.2',
'/usr/lib/libopenjpeg.so.2', '/usr/lib/libopenjpeg.so.2',
'/usr/lib/libxslt.so.1', '/usr/lib/libxslt.so.1',

View File

@ -379,7 +379,7 @@ class Py2App(object):
@flush @flush
def add_poppler(self): def add_poppler(self):
info('\nAdding poppler') info('\nAdding poppler')
for x in ('libpoppler.26.dylib',): for x in ('libpoppler.27.dylib',):
self.install_dylib(os.path.join(SW, 'lib', x)) self.install_dylib(os.path.join(SW, 'lib', x))
for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'): for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'):
self.install_dylib(os.path.join(SW, 'bin', x), False) self.install_dylib(os.path.join(SW, 'bin', x), False)

View File

@ -28,7 +28,8 @@ isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False) isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isfreebsd = 'freebsd' in _plat isfreebsd = 'freebsd' in _plat
isnetbsd = 'netbsd' in _plat isnetbsd = 'netbsd' in _plat
isbsd = isfreebsd or isnetbsd isdragonflybsd = 'dragonfly' in _plat
isbsd = isfreebsd or isnetbsd or isdragonflybsd
islinux = not(iswindows or isosx or isbsd) islinux = not(iswindows or isosx or isbsd)
isfrozen = hasattr(sys, 'frozen') isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux isunix = isosx or islinux
@ -215,3 +216,13 @@ def get_windows_temp_path():
ans = buf.value ans = buf.value
return ans if ans else None return ans if ans else None
def get_windows_user_locale_name():
import ctypes
k32 = ctypes.windll.kernel32
n = 200
buf = ctypes.create_unicode_buffer(u'\0'*n)
n = k32.GetUserDefaultLocaleName(buf, n)
if n == 0:
return None
return u'_'.join(buf.value.split(u'-')[:2])

View File

@ -1353,7 +1353,7 @@ class StoreEbookscomStore(StoreBase):
class StoreEbooksGratuitsStore(StoreBase): class StoreEbooksGratuitsStore(StoreBase):
name = 'EbooksGratuits.com' name = 'EbooksGratuits.com'
description = u'' description = u'Ebooks Libres et Gratuits'
actual_plugin = 'calibre.gui2.store.stores.ebooksgratuits_plugin:EbooksGratuitsStore' actual_plugin = 'calibre.gui2.store.stores.ebooksgratuits_plugin:EbooksGratuitsStore'
headquarters = 'FR' headquarters = 'FR'

View File

@ -87,7 +87,7 @@ class ANDROID(USBMS):
# Google # Google
0x18d1 : { 0x18d1 : {
0x0001 : [0x0223, 0x9999], 0x0001 : [0x0223, 0x230, 0x9999],
0x0003 : [0x0230], 0x0003 : [0x0230],
0x4e11 : [0x0100, 0x226, 0x227], 0x4e11 : [0x0100, 0x226, 0x227],
0x4e12 : [0x0100, 0x226, 0x227], 0x4e12 : [0x0100, 0x226, 0x227],
@ -196,7 +196,7 @@ class ANDROID(USBMS):
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON', 'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP', 'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD', 'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD',
'PMP5097C', 'MASS', 'NOVO7', 'ZEKI'] 'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE', WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -214,7 +214,8 @@ class ANDROID(USBMS):
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO', '.KOBO_VOX', 'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO', '.KOBO_VOX',
'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE', 'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID'] 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
'S5830I_CARD', 'MID7042']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -224,7 +225,7 @@ class ANDROID(USBMS):
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC', 'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875', 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727', 'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
'USB_FLASH_DRIVER', 'ANDROID'] 'USB_FLASH_DRIVER', 'ANDROID', 'MID7042']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -92,6 +92,7 @@ class ControlError(ProtocolError):
def __init__(self, query=None, response=None, desc=None): def __init__(self, query=None, response=None, desc=None):
self.query = query self.query = query
self.response = response self.response = response
self.desc = desc
ProtocolError.__init__(self, desc) ProtocolError.__init__(self, desc)
def __str__(self): def __str__(self):

View File

@ -39,6 +39,7 @@ class MTPDeviceBase(DevicePlugin):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
DevicePlugin.__init__(self, *args, **kwargs) DevicePlugin.__init__(self, *args, **kwargs)
self.progress_reporter = None self.progress_reporter = None
self.current_friendly_name = None
def reset(self, key='-1', log_packets=False, report_progress=None, def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None): detected_device=None):
@ -47,3 +48,7 @@ class MTPDeviceBase(DevicePlugin):
def set_progress_reporter(self, report_progress): def set_progress_reporter(self, report_progress):
self.progress_reporter = report_progress self.progress_reporter = report_progress
def get_gui_name(self):
return self.current_friendly_name or self.name

View File

@ -14,7 +14,7 @@ from collections import deque, OrderedDict
from io import BytesIO from io import BytesIO
from calibre import prints from calibre import prints
from calibre.devices.errors import OpenFailed from calibre.devices.errors import OpenFailed, DeviceError
from calibre.devices.mtp.base import MTPDeviceBase, synchronous from calibre.devices.mtp.base import MTPDeviceBase, synchronous
from calibre.devices.mtp.unix.detect import MTPDetect from calibre.devices.mtp.unix.detect import MTPDetect
@ -102,11 +102,6 @@ class MTP_DEVICE(MTPDeviceBase):
if self.progress_reporter is not None: if self.progress_reporter is not None:
self.progress_reporter(p) self.progress_reporter(p)
@synchronous
def get_gui_name(self):
if self.dev is None or not self.dev.friendly_name: return self.name
return self.dev.friendly_name
@synchronous @synchronous
def is_usb_connected(self, devices_on_system, debug=False, def is_usb_connected(self, devices_on_system, debug=False,
only_presence=False): only_presence=False):
@ -134,7 +129,7 @@ class MTP_DEVICE(MTPDeviceBase):
@synchronous @synchronous
def post_yank_cleanup(self): def post_yank_cleanup(self):
self.dev = self.filesystem_cache = None self.dev = self.filesystem_cache = self.current_friendly_name = None
@synchronous @synchronous
def startup(self): def startup(self):
@ -184,15 +179,18 @@ class MTP_DEVICE(MTPDeviceBase):
self._carda_id = storage[1]['id'] self._carda_id = storage[1]['id']
if len(storage) > 2: if len(storage) > 2:
self._cardb_id = storage[2]['id'] self._cardb_id = storage[2]['id']
self.current_friendly_name = self.dev.name
@synchronous
def read_filesystem_cache(self):
try: try:
files, errs = self.dev.get_filelist(self) files, errs = self.dev.get_filelist(self)
if errs and not files: if errs and not files:
raise OpenFailed('Failed to read files from device. Underlying errors:\n' raise DeviceError('Failed to read files from device. Underlying errors:\n'
+self.format_errorstack(errs)) +self.format_errorstack(errs))
folders, errs = self.dev.get_folderlist() folders, errs = self.dev.get_folderlist()
if errs and not folders: if errs and not folders:
raise OpenFailed('Failed to read folders from device. Underlying errors:\n' raise DeviceError('Failed to read folders from device. Underlying errors:\n'
+self.format_errorstack(errs)) +self.format_errorstack(errs))
self.filesystem_cache = FilesystemCache(files, folders) self.filesystem_cache = FilesystemCache(files, folders)
except: except:
@ -202,15 +200,15 @@ class MTP_DEVICE(MTPDeviceBase):
@synchronous @synchronous
def get_device_information(self, end_session=True): def get_device_information(self, end_session=True):
d = self.dev d = self.dev
return (d.friendly_name, d.device_version, d.device_version, '') return (self.current_friendly_name, d.device_version, d.device_version, '')
@synchronous @synchronous
def card_prefix(self, end_session=True): def card_prefix(self, end_session=True):
ans = [None, None] ans = [None, None]
if self._carda_id is not None: if self._carda_id is not None:
ans[0] = 'mtp:%d:'%self._carda_id ans[0] = 'mtp:::%d:::'%self._carda_id
if self._cardb_id is not None: if self._cardb_id is not None:
ans[1] = 'mtp:%d:'%self._cardb_id ans[1] = 'mtp:::%d:::'%self._cardb_id
return tuple(ans) return tuple(ans)
@synchronous @synchronous
@ -248,6 +246,7 @@ if __name__ == '__main__':
devs = linux_scanner() devs = linux_scanner()
mtp_devs = dev.detect(devs) mtp_devs = dev.detect(devs)
dev.open(list(mtp_devs)[0], 'xxx') dev.open(list(mtp_devs)[0], 'xxx')
dev.read_filesystem_cache()
d = dev.dev d = dev.dev
print ("Opened device:", dev.get_gui_name()) print ("Opened device:", dev.get_gui_name())
print ("Storage info:") print ("Storage info:")

View File

@ -1,3 +1,11 @@
/*
* libmtp.c
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define UNICODE #define UNICODE
#include <Python.h> #include <Python.h>

View File

@ -0,0 +1,137 @@
/*
* device.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "global.h"
extern IPortableDevice* wpd::open_device(const wchar_t *pnp_id, IPortableDeviceValues *client_information);
extern IPortableDeviceValues* wpd::get_client_information();
extern PyObject* wpd::get_device_information(IPortableDevice *device);
using namespace wpd;
// Device.__init__() {{{
static void
dealloc(Device* self)
{
if (self->pnp_id != NULL) free(self->pnp_id);
self->pnp_id = NULL;
if (self->device != NULL) {
Py_BEGIN_ALLOW_THREADS;
self->device->Close(); self->device->Release();
self->device = NULL;
Py_END_ALLOW_THREADS;
}
if (self->client_information != NULL) { self->client_information->Release(); self->client_information = NULL; }
Py_XDECREF(self->device_information); self->device_information = NULL;
self->ob_type->tp_free((PyObject*)self);
}
static int
init(Device *self, PyObject *args, PyObject *kwds)
{
PyObject *pnp_id;
int ret = -1;
if (!PyArg_ParseTuple(args, "O", &pnp_id)) return -1;
self->pnp_id = unicode_to_wchar(pnp_id);
if (self->pnp_id == NULL) return -1;
self->client_information = get_client_information();
if (self->client_information != NULL) {
self->device = open_device(self->pnp_id, self->client_information);
if (self->device != NULL) {
self->device_information = get_device_information(self->device);
if (self->device_information != NULL) ret = 0;
}
}
return ret;
}
// }}}
// update_device_data() {{{
static PyObject*
update_data(Device *self, PyObject *args, PyObject *kwargs) {
PyObject *di = NULL;
di = get_device_information(self->device);
if (di == NULL) return NULL;
Py_XDECREF(self->device_information); self->device_information = di;
Py_RETURN_NONE;
} // }}}
static PyMethodDef Device_methods[] = {
{"update_data", (PyCFunction)update_data, METH_VARARGS,
"update_data() -> Reread the basic device data from the device (total, space, free space, storage locations, etc.)"
},
{NULL}
};
// Device.data {{{
static PyObject *
Device_data(Device *self, void *closure) {
Py_INCREF(self->device_information); return self->device_information;
} // }}}
static PyGetSetDef Device_getsetters[] = {
{(char *)"data",
(getter)Device_data, NULL,
(char *)"The basic device information.",
NULL},
{NULL} /* Sentinel */
};
PyTypeObject wpd::DeviceType = { // {{{
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"wpd.Device", /*tp_name*/
sizeof(Device), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Device", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Device_methods, /* tp_methods */
0, /* tp_members */
Device_getsetters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
}; // }}}

View File

@ -2,7 +2,7 @@
* device_enumeration.cpp * device_enumeration.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net> * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
* *
* Distributed under terms of the MIT license. * Distributed under terms of the GPL3 license.
*/ */
#include "global.h" #include "global.h"
@ -72,15 +72,131 @@ IPortableDevice *open_device(const wchar_t *pnp_id, IPortableDeviceValues *clien
} // }}} } // }}}
PyObject* get_storage_info(IPortableDevice *device) { // {{{
HRESULT hr, hr2;
IPortableDeviceContent *content = NULL;
IEnumPortableDeviceObjectIDs *objects = NULL;
IPortableDeviceProperties *properties = NULL;
IPortableDeviceKeyCollection *storage_properties = NULL;
IPortableDeviceValues *values = NULL;
PyObject *ans = NULL, *storage = NULL, *so = NULL, *desc = NULL, *soid = NULL;
DWORD fetched, i;
PWSTR object_ids[10];
GUID guid;
ULONGLONG capacity, free_space, capacity_objects, free_objects;
ULONG access;
LPWSTR storage_desc = NULL;
storage = PyList_New(0);
if (storage == NULL) { PyErr_NoMemory(); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = device->Content(&content);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to get content interface from device", hr); goto end;}
Py_BEGIN_ALLOW_THREADS;
hr = content->Properties(&properties);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to get properties interface", hr); goto end;}
Py_BEGIN_ALLOW_THREADS;
hr = CoCreateInstance(CLSID_PortableDeviceKeyCollection, NULL,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&storage_properties));
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to create storage properties collection", hr); goto end;}
Py_BEGIN_ALLOW_THREADS;
hr = storage_properties->Add(WPD_OBJECT_CONTENT_TYPE);
hr = storage_properties->Add(WPD_FUNCTIONAL_OBJECT_CATEGORY);
hr = storage_properties->Add(WPD_STORAGE_DESCRIPTION);
hr = storage_properties->Add(WPD_STORAGE_CAPACITY);
hr = storage_properties->Add(WPD_STORAGE_CAPACITY_IN_OBJECTS);
hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_BYTES);
hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_OBJECTS);
hr = storage_properties->Add(WPD_STORAGE_ACCESS_CAPABILITY);
hr = storage_properties->Add(WPD_STORAGE_FILE_SYSTEM_TYPE);
hr = storage_properties->Add(WPD_OBJECT_NAME);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to create collection of properties for storage query", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = content->EnumObjects(0, WPD_DEVICE_OBJECT_ID, NULL, &objects);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to get objects from device", hr); goto end;}
hr = S_OK;
while (hr == S_OK) {
Py_BEGIN_ALLOW_THREADS;
hr = objects->Next(10, object_ids, &fetched);
Py_END_ALLOW_THREADS;
if (SUCCEEDED(hr)) {
for(i = 0; i < fetched; i++) {
Py_BEGIN_ALLOW_THREADS;
hr2 = properties->GetValues(object_ids[i], storage_properties, &values);
Py_END_ALLOW_THREADS;
if SUCCEEDED(hr2) {
if (
SUCCEEDED(values->GetGuidValue(WPD_OBJECT_CONTENT_TYPE, &guid)) && IsEqualGUID(guid, WPD_CONTENT_TYPE_FUNCTIONAL_OBJECT) &&
SUCCEEDED(values->GetGuidValue(WPD_FUNCTIONAL_OBJECT_CATEGORY, &guid)) && IsEqualGUID(guid, WPD_FUNCTIONAL_CATEGORY_STORAGE)
) {
capacity = 0; capacity_objects = 0; free_space = 0; free_objects = 0;
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY, &capacity);
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY_IN_OBJECTS, &capacity_objects);
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_BYTES, &free_space);
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_OBJECTS, &free_objects);
desc = Py_False;
if (SUCCEEDED(values->GetUnsignedIntegerValue(WPD_STORAGE_ACCESS_CAPABILITY, &access)) && access == WPD_STORAGE_ACCESS_CAPABILITY_READWRITE) desc = Py_True;
soid = PyUnicode_FromWideChar(object_ids[i], wcslen(object_ids[i]));
if (soid == NULL) { PyErr_NoMemory(); goto end; }
so = Py_BuildValue("{s:K,s:K,s:K,s:K,s:O,s:N}",
"capacity", capacity, "capacity_objects", capacity_objects, "free_space", free_space, "free_objects", free_objects, "rw", desc, "id", soid);
if (so == NULL) { PyErr_NoMemory(); goto end; }
if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_DESCRIPTION, &storage_desc))) {
desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
if (desc != NULL) { PyDict_SetItemString(so, "description", desc); Py_DECREF(desc);}
CoTaskMemFree(storage_desc); storage_desc = NULL;
}
if (SUCCEEDED(values->GetStringValue(WPD_OBJECT_NAME, &storage_desc))) {
desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
if (desc != NULL) { PyDict_SetItemString(so, "name", desc); Py_DECREF(desc);}
CoTaskMemFree(storage_desc); storage_desc = NULL;
}
if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_FILE_SYSTEM_TYPE, &storage_desc))) {
desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
if (desc != NULL) { PyDict_SetItemString(so, "filesystem", desc); Py_DECREF(desc);}
CoTaskMemFree(storage_desc); storage_desc = NULL;
}
PyList_Append(storage, so);
Py_DECREF(so);
}
}
}
}
}
ans = storage;
end:
if (content != NULL) content->Release();
if (objects != NULL) objects->Release();
if (properties != NULL) properties->Release();
if (storage_properties != NULL) storage_properties->Release();
if (values != NULL) values->Release();
return ans;
} // }}}
PyObject* get_device_information(IPortableDevice *device) { // {{{ PyObject* get_device_information(IPortableDevice *device) { // {{{
IPortableDeviceContent *content = NULL; IPortableDeviceContent *content = NULL;
IPortableDeviceProperties *properties = NULL; IPortableDeviceProperties *properties = NULL;
IPortableDeviceKeyCollection *keys = NULL; IPortableDeviceKeyCollection *keys = NULL;
IPortableDeviceValues *values = NULL; IPortableDeviceValues *values = NULL;
IPortableDeviceCapabilities *capabilities = NULL;
IPortableDevicePropVariantCollection *categories = NULL;
HRESULT hr; HRESULT hr;
DWORD num_of_categories, i;
LPWSTR temp; LPWSTR temp;
ULONG ti; ULONG ti;
PyObject *t, *ans = NULL; PyObject *t, *ans = NULL, *storage = NULL;
char *type; char *type;
Py_BEGIN_ALLOW_THREADS; Py_BEGIN_ALLOW_THREADS;
@ -117,6 +233,21 @@ PyObject* get_device_information(IPortableDevice *device) { // {{{
Py_END_ALLOW_THREADS; Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device info", hr); goto end; } if(FAILED(hr)) {hresult_set_exc("Failed to get device info", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = device->Capabilities(&capabilities);
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device capabilities", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = capabilities->GetFunctionalCategories(&categories);
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device functional categories", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = categories->GetCount(&num_of_categories);
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device functional categories number", hr); goto end; }
ans = PyDict_New(); ans = PyDict_New();
if (ans == NULL) {PyErr_NoMemory(); goto end;} if (ans == NULL) {PyErr_NoMemory(); goto end;}
@ -184,11 +315,34 @@ PyObject* get_device_information(IPortableDevice *device) { // {{{
CoTaskMemFree(temp); CoTaskMemFree(temp);
} }
t = Py_False;
for (i = 0; i < num_of_categories; i++) {
PROPVARIANT pv;
PropVariantInit(&pv);
if (SUCCEEDED(categories->GetAt(i, &pv)) && pv.puuid != NULL) {
if (IsEqualGUID(WPD_FUNCTIONAL_CATEGORY_STORAGE, *pv.puuid)) {
t = Py_True;
}
}
PropVariantClear(&pv);
if (t == Py_True) break;
}
PyDict_SetItemString(ans, "has_storage", t);
if (t == Py_True) {
storage = get_storage_info(device);
if (storage == NULL) goto end;
PyDict_SetItemString(ans, "storage", storage);
}
end: end:
if (keys != NULL) keys->Release(); if (keys != NULL) keys->Release();
if (values != NULL) values->Release(); if (values != NULL) values->Release();
if (properties != NULL) properties->Release(); if (properties != NULL) properties->Release();
if (content != NULL) content->Release(); if (content != NULL) content->Release();
if (capabilities != NULL) capabilities->Release();
if (categories != NULL) categories->Release();
return ans; return ans;
} // }}} } // }}}

View File

@ -0,0 +1,200 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
from threading import RLock
from calibre import as_unicode, prints
from calibre.constants import plugins, __appname__, numeric_version
from calibre.devices.errors import OpenFailed
from calibre.devices.mtp.base import MTPDeviceBase, synchronous
class MTP_DEVICE(MTPDeviceBase):
supported_platforms = ['windows']
def __init__(self, *args, **kwargs):
MTPDeviceBase.__init__(self, *args, **kwargs)
self.dev = None
self.lock = RLock()
self.blacklisted_devices = set()
self.ejected_devices = set()
self.currently_connected_pnp_id = None
self.detected_devices = {}
self.previous_devices_on_system = frozenset()
self.last_refresh_devices_time = time.time()
self.wpd = self.wpd_error = None
self._main_id = self._carda_id = self._cardb_id = None
@synchronous
def startup(self):
self.wpd, self.wpd_error = plugins['wpd']
if self.wpd is not None:
try:
self.wpd.init(__appname__, *(numeric_version[:3]))
except self.wpd.NoWPD:
self.wpd_error = _(
'The Windows Portable Devices service is not available'
' on your computer. You may need to install Windows'
' Media Player 11 or newer and/or restart your computer')
except Exception as e:
self.wpd_error = as_unicode(e)
@synchronous
def shutdown(self):
self.dev = self.filesystem_cache = None
if self.wpd is not None:
self.wpd.uninit()
@synchronous
def detect_managed_devices(self, devices_on_system):
if self.wpd is None: return None
devices_on_system = frozenset(devices_on_system)
if (devices_on_system != self.previous_devices_on_system or time.time()
- self.last_refresh_devices_time > 10):
self.previous_devices_on_system = devices_on_system
self.last_refresh_devices_time = time.time()
try:
pnp_ids = frozenset(self.wpd.enumerate_devices())
except:
return None
self.detected_devices = {dev:self.detected_devices.get(dev, None)
for dev in pnp_ids}
# Get device data for detected devices. If there is an error, we will
# try again for that device the next time this method is called.
for dev in tuple(self.detected_devices.iterkeys()):
data = self.detected_devices.get(dev, None)
if data is None or data is False:
try:
data = self.wpd.device_info(dev)
except Exception as e:
prints('Failed to get device info for device:', dev,
as_unicode(e))
data = {} if data is False else False
self.detected_devices[dev] = data
# Remove devices that have been disconnected from ejected
# devices and blacklisted devices
self.ejected_devices = set(self.detected_devices).intersection(
self.ejected_devices)
self.blacklisted_devices = set(self.detected_devices).intersection(
self.blacklisted_devices)
if self.currently_connected_pnp_id is not None:
return (self.currently_connected_pnp_id if
self.currently_connected_pnp_id in self.detected_devices
else None)
for dev, data in self.detected_devices.iteritems():
if dev in self.blacklisted_devices or dev in self.ejected_devices:
# Ignore blacklisted and ejected devices
continue
if data and self.is_suitable_wpd_device(data):
return dev
return None
def is_suitable_wpd_device(self, devdata):
# Check that protocol is MTP
protocol = devdata.get('protocol', '').lower()
if not protocol.startswith('mtp:'): return False
# Check that the device has some read-write storage
if not devdata.get('has_storage', False): return False
has_rw_storage = False
for s in devdata.get('storage', []):
if s.get('rw', False):
has_rw_storage = True
break
if not has_rw_storage: return False
return True
@synchronous
def post_yank_cleanup(self):
self.currently_connected_pnp_id = self.current_friendly_name = None
self._main_id = self._carda_id = self._cardb_id = None
self.dev = self.filesystem_cache = None
@synchronous
def eject(self):
if self.currently_connected_pnp_id is None: return
self.ejected_devices.add(self.currently_connected_pnp_id)
self.currently_connected_pnp_id = self.current_friendly_name = None
self._main_id = self._carda_id = self._cardb_id = None
self.dev = self.filesystem_cache = None
@synchronous
def open(self, connected_device, library_uuid):
self.dev = self.filesystem_cache = None
try:
self.dev = self.wpd.Device(connected_device)
except self.wpd.WPDError:
time.sleep(2)
try:
self.dev = self.wpd.Device(connected_device)
except self.wpd.WPDError as e:
self.blacklisted_devices.add(connected_device)
raise OpenFailed('Failed to open %s with error: %s'%(
connected_device, as_unicode(e)))
devdata = self.dev.data
storage = [s for s in devdata.get('storage', []) if s.get('rw', False)]
if not storage:
self.blacklisted_devices.add(connected_device)
raise OpenFailed('No storage found for device %s'%(connected_device,))
self._main_id = storage[0]['id']
if len(storage) > 1:
self._carda_id = storage[1]['id']
if len(storage) > 2:
self._cardb_id = storage[2]['id']
self.current_friendly_name = devdata.get('friendly_name', None)
@synchronous
def get_device_information(self, end_session=True):
d = self.dev.data
dv = d.get('device_version', '')
return (self.current_friendly_name, dv, dv, '')
@synchronous
def card_prefix(self, end_session=True):
ans = [None, None]
if self._carda_id is not None:
ans[0] = 'mtp:::%s:::'%self._carda_id
if self._cardb_id is not None:
ans[1] = 'mtp:::%s:::'%self._cardb_id
return tuple(ans)
@synchronous
def total_space(self, end_session=True):
ans = [0, 0, 0]
dd = self.dev.data
for s in dd.get('storage', []):
i = {self._main_id:0, self._carda_id:1,
self._cardb_id:2}.get(s.get('id', -1), None)
if i is not None:
ans[i] = s['capacity']
return tuple(ans)
@synchronous
def free_space(self, end_session=True):
self.dev.update_data()
ans = [0, 0, 0]
dd = self.dev.data
for s in dd.get('storage', []):
i = {self._main_id:0, self._carda_id:1,
self._cardb_id:2}.get(s.get('id', -1), None)
if i is not None:
ans[i] = s['free_space']
return tuple(ans)

View File

@ -2,7 +2,7 @@
* global.h * global.h
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net> * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
* *
* Distributed under terms of the MIT license. * Distributed under terms of the GPL3 license.
*/ */
#pragma once #pragma once
@ -34,6 +34,18 @@ typedef struct {
} ClientInfo; } ClientInfo;
extern ClientInfo client_info; extern ClientInfo client_info;
// Device type
typedef struct {
PyObject_HEAD
// Type-specific fields go here.
wchar_t *pnp_id;
IPortableDeviceValues *client_information;
IPortableDevice *device;
PyObject *device_information;
} Device;
extern PyTypeObject DeviceType;
// Utility functions // Utility functions
PyObject *hresult_set_exc(const char *msg, HRESULT hr); PyObject *hresult_set_exc(const char *msg, HRESULT hr);
wchar_t *unicode_to_wchar(PyObject *o); wchar_t *unicode_to_wchar(PyObject *o);

View File

@ -7,39 +7,70 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import subprocess, sys, os import subprocess, sys, os, pprint, signal, time, glob
pprint
def build():
builder = subprocess.Popen('ssh xp_build ~/build-wpd'.split())
syncer = subprocess.Popen('ssh getafix ~/test-wpd'.split())
if builder.wait() != 0:
raise Exception('Failed to build plugin')
if syncer.wait() != 0:
raise Exception('Failed to rsync to getafix')
subprocess.check_call(
'scp xp_build:build/calibre/src/calibre/plugins/wpd.pyd /tmp'.split())
subprocess.check_call(
'scp /tmp/wpd.pyd getafix:calibre/src/calibre/devices/mtp/windows'.split())
p = subprocess.Popen(
'ssh getafix calibre-debug -e calibre/src/calibre/devices/mtp/windows/remote.py'.split())
p.wait()
print()
def build(mod='wpd'):
master = subprocess.Popen('ssh -MN getafix'.split())
master2 = subprocess.Popen('ssh -MN xp_build'.split())
try:
while not glob.glob(os.path.expanduser('~/.ssh/*kovid@xp_build*')):
time.sleep(0.05)
builder = subprocess.Popen('ssh xp_build ~/build-wpd'.split())
if builder.wait() != 0:
raise Exception('Failed to build plugin')
while not glob.glob(os.path.expanduser('~/.ssh/*kovid@getafix*')):
time.sleep(0.05)
syncer = subprocess.Popen('ssh getafix ~/test-wpd'.split())
if syncer.wait() != 0:
raise Exception('Failed to rsync to getafix')
subprocess.check_call(
('scp xp_build:build/calibre/src/calibre/plugins/%s.pyd /tmp'%mod).split())
subprocess.check_call(
('scp /tmp/%s.pyd getafix:calibre/src/calibre/devices/mtp/windows'%mod).split())
p = subprocess.Popen(
'ssh getafix calibre-debug -e calibre/src/calibre/devices/mtp/windows/remote.py'.split())
p.wait()
print()
finally:
for m in (master2, master):
m.send_signal(signal.SIGHUP)
for m in (master2, master):
m.wait()
def main(): def main():
import pprint fp, d = os.path.abspath(__file__), os.path.dirname
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) if b'CALIBRE_DEVELOP_FROM' not in os.environ:
env = os.environ.copy()
env[b'CALIBRE_DEVELOP_FROM'] = bytes(d(d(d(d(d(fp))))))
subprocess.call(['calibre-debug', '-e', fp], env=env)
return
sys.path.insert(0, os.path.dirname(fp))
if 'wpd' in sys.modules:
del sys.modules['wpd']
import wpd import wpd
from calibre.constants import plugins from calibre.constants import plugins
plugins._plugins['wpd'] = (wpd, '') plugins._plugins['wpd'] = (wpd, '')
sys.path.pop(0) sys.path.pop(0)
wpd.init('calibre', 1, 0, 0)
from calibre.devices.scanner import win_scanner
from calibre.devices.mtp.windows.driver import MTP_DEVICE
dev = MTP_DEVICE(None)
dev.startup()
print (dev.wpd, dev.wpd_error)
try: try:
for pnp_id in wpd.enumerate_devices(): devices = win_scanner()
print (pnp_id) pnp_id = dev.detect_managed_devices(devices)
pprint.pprint(wpd.device_info(pnp_id)) # pprint.pprint(dev.detected_devices)
print ('Trying to connect to:', pnp_id)
dev.open(pnp_id, '')
print ('Connected to:', dev.get_gui_name())
print ('Total space', dev.total_space())
print ('Free space', dev.free_space())
finally: finally:
wpd.uninit() dev.shutdown()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -2,7 +2,7 @@
* utils.cpp * utils.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net> * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
* *
* Distributed under terms of the MIT license. * Distributed under terms of the GPL3 license.
*/ */
#include "global.h" #include "global.h"
@ -33,13 +33,12 @@ PyObject *wpd::hresult_set_exc(const char *msg, HRESULT hr) {
wchar_t *wpd::unicode_to_wchar(PyObject *o) { wchar_t *wpd::unicode_to_wchar(PyObject *o) {
wchar_t *buf; wchar_t *buf;
Py_ssize_t len; Py_ssize_t len;
if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The pnp id must be a unicode object"); return NULL;} if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The python object must be a unicode object"); return NULL;}
len = PyUnicode_GET_SIZE(o); len = PyUnicode_GET_SIZE(o);
if (len < 1) {PyErr_Format(PyExc_TypeError, "The pnp id must not be empty."); return NULL;}
buf = (wchar_t *)calloc(len+2, sizeof(wchar_t)); buf = (wchar_t *)calloc(len+2, sizeof(wchar_t));
if (buf == NULL) { PyErr_NoMemory(); return NULL; } if (buf == NULL) { PyErr_NoMemory(); return NULL; }
len = PyUnicode_AsWideChar((PyUnicodeObject*)o, buf, len); len = PyUnicode_AsWideChar((PyUnicodeObject*)o, buf, len);
if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid pnp id."); return NULL; } if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid python unicode object."); return NULL; }
return buf; return buf;
} }

View File

@ -2,7 +2,7 @@
* mtp.c * mtp.c
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net> * Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
* *
* Distributed under terms of the MIT license. * Distributed under terms of the GPL3 license.
*/ */
#include "global.h" #include "global.h"
@ -92,14 +92,10 @@ wpd_enumerate_devices(PyObject *self, PyObject *args) {
ENSURE_WPD(NULL); ENSURE_WPD(NULL);
if (!PyArg_ParseTuple(args, "|O", &refresh)) return NULL; Py_BEGIN_ALLOW_THREADS;
hr = portable_device_manager->RefreshDeviceList();
if (refresh != NULL && PyObject_IsTrue(refresh)) { Py_END_ALLOW_THREADS;
Py_BEGIN_ALLOW_THREADS; if (FAILED(hr)) return hresult_set_exc("Failed to refresh the list of portable devices", hr);
hr = portable_device_manager->RefreshDeviceList();
Py_END_ALLOW_THREADS;
if (FAILED(hr)) return hresult_set_exc("Failed to refresh the list of portable devices", hr);
}
hr = portable_device_manager->GetDevices(NULL, &num_of_devices); hr = portable_device_manager->GetDevices(NULL, &num_of_devices);
num_of_devices += 15; // Incase new devices were connected between this call and the next num_of_devices += 15; // Incase new devices were connected between this call and the next
@ -148,6 +144,7 @@ wpd_device_info(PyObject *self, PyObject *args) {
if (!PyArg_ParseTuple(args, "O", &py_pnp_id)) return NULL; if (!PyArg_ParseTuple(args, "O", &py_pnp_id)) return NULL;
pnp_id = unicode_to_wchar(py_pnp_id); pnp_id = unicode_to_wchar(py_pnp_id);
if (wcslen(pnp_id) < 1) { PyErr_SetString(WPDError, "The PNP id must not be empty."); return NULL; }
if (pnp_id == NULL) return NULL; if (pnp_id == NULL) return NULL;
client_information = get_client_information(); client_information = get_client_information();
@ -174,7 +171,7 @@ static PyMethodDef wpd_methods[] = {
}, },
{"enumerate_devices", wpd_enumerate_devices, METH_VARARGS, {"enumerate_devices", wpd_enumerate_devices, METH_VARARGS,
"enumerate_devices(refresh=False)\n\n Get the list of device PnP ids for all connected devices recognized by the WPD service. The result is cached, unless refresh=True. Do not call with refresh=True too often as it is resource intensive." "enumerate_devices()\n\n Get the list of device PnP ids for all connected devices recognized by the WPD service. Do not call too often as it is resource intensive."
}, },
{"device_info", wpd_device_info, METH_VARARGS, {"device_info", wpd_device_info, METH_VARARGS,
@ -189,6 +186,10 @@ PyMODINIT_FUNC
initwpd(void) { initwpd(void) {
PyObject *m; PyObject *m;
wpd::DeviceType.tp_new = PyType_GenericNew;
if (PyType_Ready(&wpd::DeviceType) < 0)
return;
m = Py_InitModule3("wpd", wpd_methods, "Interface to the WPD windows service."); m = Py_InitModule3("wpd", wpd_methods, "Interface to the WPD windows service.");
if (m == NULL) return; if (m == NULL) return;
@ -197,6 +198,10 @@ initwpd(void) {
NoWPD = PyErr_NewException("wpd.NoWPD", NULL, NULL); NoWPD = PyErr_NewException("wpd.NoWPD", NULL, NULL);
if (NoWPD == NULL) return; if (NoWPD == NULL) return;
Py_INCREF(&DeviceType);
PyModule_AddObject(m, "Device", (PyObject *)&DeviceType);
} }

View File

@ -193,7 +193,11 @@ class PRST1(USBMS):
time_offsets = {} time_offsets = {}
for i, row in enumerate(cursor): for i, row in enumerate(cursor):
comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000); try:
comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
except (OSError, IOError):
# In case the db has incorrect path info
continue
device_date = int(row[1]); device_date = int(row[1]);
offset = device_date - comp_date offset = device_date - comp_date
time_offsets.setdefault(offset, 0) time_offsets.setdefault(offset, 0)

View File

@ -10,7 +10,8 @@ from threading import RLock
from collections import namedtuple from collections import namedtuple
from calibre import prints, as_unicode from calibre import prints, as_unicode
from calibre.constants import iswindows, isosx, plugins, islinux, isfreebsd from calibre.constants import (iswindows, isosx, plugins, islinux, isfreebsd,
isnetbsd)
osx_scanner = win_scanner = linux_scanner = None osx_scanner = win_scanner = linux_scanner = None
@ -253,13 +254,18 @@ freebsd_scanner = None
if isfreebsd: if isfreebsd:
freebsd_scanner = FreeBSDScanner() freebsd_scanner = FreeBSDScanner()
netbsd_scanner = None
''' NetBSD support currently not written yet '''
if isnetbsd:
netbsd_scanner = None
class DeviceScanner(object): class DeviceScanner(object):
def __init__(self, *args): def __init__(self, *args):
if isosx and osx_scanner is None: if isosx and osx_scanner is None:
raise RuntimeError('The Python extension usbobserver must be available on OS X.') raise RuntimeError('The Python extension usbobserver must be available on OS X.')
self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else linux_scanner self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else netbsd_scanner if isnetbsd else linux_scanner
self.devices = [] self.devices = []
def scan(self): def scan(self):

View File

@ -11,11 +11,12 @@ import socket, select, json, inspect, os, traceback, time, sys, random
import hashlib, threading import hashlib, threading
from base64 import b64encode, b64decode from base64 import b64encode, b64decode
from functools import wraps from functools import wraps
from errno import EAGAIN, EINTR
from calibre import prints from calibre import prints
from calibre.constants import numeric_version, DEBUG from calibre.constants import numeric_version, DEBUG
from calibre.devices.errors import (OpenFailed, ControlError, TimeoutError, from calibre.devices.errors import (OpenFailed, ControlError, TimeoutError,
InitialConnectionError) InitialConnectionError, PacketError)
from calibre.devices.interface import DevicePlugin from calibre.devices.interface import DevicePlugin
from calibre.devices.usbms.books import Book, CollectionsBookList from calibre.devices.usbms.books import Book, CollectionsBookList
from calibre.devices.usbms.deviceconfig import DeviceConfig from calibre.devices.usbms.deviceconfig import DeviceConfig
@ -85,6 +86,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
MAX_CLIENT_COMM_TIMEOUT = 60.0 # Wait at most N seconds for an answer MAX_CLIENT_COMM_TIMEOUT = 60.0 # Wait at most N seconds for an answer
MAX_UNSUCCESSFUL_CONNECTS = 5 MAX_UNSUCCESSFUL_CONNECTS = 5
SEND_NOOP_EVERY_NTH_PROBE = 5
DISCONNECT_AFTER_N_SECONDS = 30*60 # 30 minutes
opcodes = { opcodes = {
'NOOP' : 12, 'NOOP' : 12,
'OK' : 0, 'OK' : 0,
@ -120,7 +124,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
_('Use fixed network port') + ':::<p>' + _('Use fixed network port') + ':::<p>' +
_('If checked, use the port number in the "Port" box, otherwise ' _('If checked, use the port number in the "Port" box, otherwise '
'the driver will pick a random port') + '</p>', 'the driver will pick a random port') + '</p>',
_('Port') + ':::<p>' + _('Port number: ') + ':::<p>' +
_('Enter the port number the driver is to use if the "fixed port" box is checked') + '</p>', _('Enter the port number the driver is to use if the "fixed port" box is checked') + '</p>',
_('Print extra debug information') + ':::<p>' + _('Print extra debug information') + ':::<p>' +
_('Check this box if requested when reporting problems') + '</p>', _('Check this box if requested when reporting problems') + '</p>',
@ -131,7 +135,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
_('. Two special collections are available: %(abt)s:%(abtv)s and %(aba)s:%(abav)s. Add ' _('. Two special collections are available: %(abt)s:%(abtv)s and %(aba)s:%(abav)s. Add '
'these values to the list to enable them. The collections will be ' 'these values to the list to enable them. The collections will be '
'given the name provided after the ":" character.')%dict( 'given the name provided after the ":" character.')%dict(
abt='abt', abtv=ALL_BY_TITLE, aba='aba', abav=ALL_BY_AUTHOR) abt='abt', abtv=ALL_BY_TITLE, aba='aba', abav=ALL_BY_AUTHOR),
'',
_('Enable the no-activity timeout') + ':::<p>' +
_('If this box is checked, calibre will automatically disconnect if '
'a connected device does nothing for %d minutes. Unchecking this '
' box disables this timeout, so calibre will never automatically '
'disconnect.')%(DISCONNECT_AFTER_N_SECONDS/60,) + '</p>',
] ]
EXTRA_CUSTOMIZATION_DEFAULT = [ EXTRA_CUSTOMIZATION_DEFAULT = [
False, False,
@ -141,7 +151,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
False, '9090', False, '9090',
False, False,
'', '',
'' '',
'',
True,
] ]
OPT_AUTOSTART = 0 OPT_AUTOSTART = 0
OPT_PASSWORD = 2 OPT_PASSWORD = 2
@ -149,6 +161,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
OPT_PORT_NUMBER = 5 OPT_PORT_NUMBER = 5
OPT_EXTRA_DEBUG = 6 OPT_EXTRA_DEBUG = 6
OPT_COLLECTIONS = 8 OPT_COLLECTIONS = 8
OPT_AUTODISCONNECT = 10
def __init__(self, path): def __init__(self, path):
self.sync_lock = threading.RLock() self.sync_lock = threading.RLock()
@ -165,7 +178,16 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
inspect.stack()[1][3]), end='') inspect.stack()[1][3]), end='')
for a in args: for a in args:
try: try:
prints('', a, end='') if isinstance(a, dict):
printable = {}
for k,v in a.iteritems():
if isinstance(v, (str, unicode)) and len(v) > 50:
printable[k] = 'too long'
else:
printable[k] = v
prints('', printable, end='');
else:
prints('', a, end='')
except: except:
prints('', 'value too long', end='') prints('', 'value too long', end='')
print() print()
@ -339,6 +361,27 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
pos += len(v) pos += len(v)
return data return data
def _send_byte_string(self, s):
if not isinstance(s, bytes):
self._debug('given a non-byte string!')
raise PacketError("Internal error: found a string that isn't bytes")
sent_len = 0;
total_len = len(s)
while sent_len < total_len:
try:
if sent_len == 0:
amt_sent = self.device_socket.send(s)
else:
amt_sent = self.device_socket.send(s[sent_len:])
if amt_sent <= 0:
raise IOError('Bad write on device socket');
sent_len += amt_sent
except socket.error as e:
self._debug('socket error', e, e.errno)
if e.args[0] != EAGAIN and e.args[0] != EINTR:
raise
time.sleep(0.1) # lets not hammer the OS too hard
def _call_client(self, op, arg, print_debug_info=True): def _call_client(self, op, arg, print_debug_info=True):
if op != 'NOOP': if op != 'NOOP':
self.noop_counter = 0 self.noop_counter = 0
@ -355,9 +398,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if print_debug_info and extra_debug: if print_debug_info and extra_debug:
self._debug('send string', s) self._debug('send string', s)
self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT) self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
self.device_socket.sendall(('%d' % len(s))+s) self._send_byte_string((b'%d' % len(s))+s)
self.device_socket.settimeout(None)
v = self._read_string_from_net() v = self._read_string_from_net()
self.device_socket.settimeout(None)
if print_debug_info and extra_debug: if print_debug_info and extra_debug:
self._debug('received string', v) self._debug('received string', v)
if v: if v:
@ -373,13 +416,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
except socket.error: except socket.error:
self._debug('device went away') self._debug('device went away')
self._close_device_socket() self._close_device_socket()
raise ControlError('Device closed the network connection') raise ControlError(desc='Device closed the network connection')
except: except:
self._debug('other exception') self._debug('other exception')
traceback.print_exc() traceback.print_exc()
self._close_device_socket() self._close_device_socket()
raise raise
raise ControlError('Device responded with incorrect information') raise ControlError(desc='Device responded with incorrect information')
# Write a file as a series of base64-encoded strings. # Write a file as a series of base64-encoded strings.
def _put_file(self, infile, lpath, book_metadata, this_book, total_books): def _put_file(self, infile, lpath, book_metadata, this_book, total_books):
@ -475,7 +518,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self.is_connected = False self.is_connected = False
if self.is_connected: if self.is_connected:
self.noop_counter += 1 self.noop_counter += 1
if only_presence and (self.noop_counter % 5) != 1: if only_presence and (
self.noop_counter % self.SEND_NOOP_EVERY_NTH_PROBE) != 1:
try: try:
ans = select.select((self.device_socket,), (), (), 0) ans = select.select((self.device_socket,), (), (), 0)
if len(ans[0]) == 0: if len(ans[0]) == 0:
@ -486,11 +530,16 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
# This will usually toss an exception if the socket is gone. # This will usually toss an exception if the socket is gone.
except: except:
pass pass
try: if (self.settings().extra_customization[self.OPT_AUTODISCONNECT] and
if self._call_client('NOOP', dict())[0] is None: self.noop_counter > self.DISCONNECT_AFTER_N_SECONDS):
self._close_device_socket()
except:
self._close_device_socket() self._close_device_socket()
self._debug('timeout -- disconnected')
else:
try:
if self._call_client('NOOP', dict())[0] is None:
self._close_device_socket()
except:
self._close_device_socket()
return (self.is_connected, self) return (self.is_connected, self)
if getattr(self, 'listen_socket', None) is not None: if getattr(self, 'listen_socket', None) is not None:
ans = select.select((self.listen_socket,), (), (), 0) ans = select.select((self.listen_socket,), (), (), 0)
@ -533,7 +582,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._debug() self._debug()
if not self.is_connected: if not self.is_connected:
# We have been called to retry the connection. Give up immediately # We have been called to retry the connection. Give up immediately
raise ControlError('Attempt to open a closed device') raise ControlError(desc='Attempt to open a closed device')
self.current_library_uuid = library_uuid self.current_library_uuid = library_uuid
self.current_library_name = current_library_name() self.current_library_name = current_library_name()
try: try:
@ -569,6 +618,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._debug('Protocol error - bogus book packet length') self._debug('Protocol error - bogus book packet length')
self._close_device_socket() self._close_device_socket()
return False return False
self._debug('CC version #:', result.get('ccVersionNumber', 'unknown'))
self.max_book_packet_len = result.get('maxBookContentPacketLen', self.max_book_packet_len = result.get('maxBookContentPacketLen',
self.BASE_PACKET_LEN) self.BASE_PACKET_LEN)
exts = result.get('acceptedExtensions', None) exts = result.get('acceptedExtensions', None)
@ -689,7 +739,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._set_known_metadata(book) self._set_known_metadata(book)
bl.add_book(book, replace_metadata=True) bl.add_book(book, replace_metadata=True)
else: else:
raise ControlError('book metadata not returned') raise ControlError(desc='book metadata not returned')
return bl return bl
@synchronous('sync_lock') @synchronous('sync_lock')
@ -720,7 +770,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
print_debug_info=False) print_debug_info=False)
if opcode != 'OK': if opcode != 'OK':
self._debug('protocol error', opcode, i) self._debug('protocol error', opcode, i)
raise ControlError('sync_booklists') raise ControlError(desc='sync_booklists')
@synchronous('sync_lock') @synchronous('sync_lock')
def eject(self): def eject(self):
@ -748,7 +798,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
book = Book(self.PREFIX, lpath, other=mdata) book = Book(self.PREFIX, lpath, other=mdata)
length = self._put_file(infile, lpath, book, i, len(files)) length = self._put_file(infile, lpath, book, i, len(files))
if length < 0: if length < 0:
raise ControlError('Sending book %s to device failed' % lpath) raise ControlError(desc='Sending book %s to device failed' % lpath)
paths.append((lpath, length)) paths.append((lpath, length))
# No need to deal with covers. The client will get the thumbnails # No need to deal with covers. The client will get the thumbnails
# in the mi structure # in the mi structure
@ -789,7 +839,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if opcode == 'OK': if opcode == 'OK':
self._debug('removed book with UUID', result['uuid']) self._debug('removed book with UUID', result['uuid'])
else: else:
raise ControlError('Protocol error - delete books') raise ControlError(desc='Protocol error - delete books')
@synchronous('sync_lock') @synchronous('sync_lock')
def remove_books_from_metadata(self, paths, booklists): def remove_books_from_metadata(self, paths, booklists):
@ -825,7 +875,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
else: else:
eof = True eof = True
else: else:
raise ControlError('request for book data failed') raise ControlError(desc='request for book data failed')
@synchronous('sync_lock') @synchronous('sync_lock')
def set_plugboards(self, plugboards, pb_func): def set_plugboards(self, plugboards, pb_func):

View File

@ -88,6 +88,15 @@ class MOBIOutput(OutputFormatPlugin):
'formats. This option tells calibre not to do this. ' 'formats. This option tells calibre not to do this. '
'Useful if your document contains lots of GIF/PNG images that ' 'Useful if your document contains lots of GIF/PNG images that '
'become very large when converted to JPEG.')), 'become very large when converted to JPEG.')),
OptionRecommendation(name='mobi_file_type', choices=['old', 'both',
'new'], recommended_value='old',
help=_('By default calibre generates MOBI files that contain the '
'old MOBI 6 format. This format is compatible with all '
'devices. However, by changing this setting, you can tell '
'calibre to generate MOBI files that contain both MOBI 6 and '
'the new KF8 format, or only the new KF8 format. KF8 has '
'more features than MOBI 6, but only works with newer Kindles.')),
]) ])
def check_for_periodical(self): def check_for_periodical(self):
@ -165,11 +174,10 @@ class MOBIOutput(OutputFormatPlugin):
toc.nodes[0].href = toc.nodes[0].nodes[0].href toc.nodes[0].href = toc.nodes[0].nodes[0].href
def convert(self, oeb, output_path, input_plugin, opts, log): def convert(self, oeb, output_path, input_plugin, opts, log):
from calibre.utils.config import tweaks
from calibre.ebooks.mobi.writer2.resources import Resources from calibre.ebooks.mobi.writer2.resources import Resources
self.log, self.opts, self.oeb = log, opts, oeb self.log, self.opts, self.oeb = log, opts, oeb
mobi_type = tweaks.get('test_mobi_output_type', 'old') mobi_type = opts.mobi_file_type
if self.is_periodical: if self.is_periodical:
mobi_type = 'old' # Amazon does not support KF8 periodicals mobi_type = 'old' # Amazon does not support KF8 periodicals
create_kf8 = mobi_type in ('new', 'both') create_kf8 = mobi_type in ('new', 'both')

View File

@ -11,6 +11,7 @@ from collections import defaultdict
from lxml import etree from lxml import etree
import cssutils import cssutils
from cssutils.css import Property
from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES, from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
namespace, barename, XPath) namespace, barename, XPath)
@ -276,10 +277,16 @@ class CSSFlattener(object):
cssdict['font-family'] = node.attrib['face'] cssdict['font-family'] = node.attrib['face']
del node.attrib['face'] del node.attrib['face']
if 'color' in node.attrib: if 'color' in node.attrib:
cssdict['color'] = node.attrib['color'] try:
cssdict['color'] = Property('color', node.attrib['color']).value
except ValueError:
pass
del node.attrib['color'] del node.attrib['color']
if 'bgcolor' in node.attrib: if 'bgcolor' in node.attrib:
cssdict['background-color'] = node.attrib['bgcolor'] try:
cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
except ValueError:
pass
del node.attrib['bgcolor'] del node.attrib['bgcolor']
if cssdict.get('font-weight', '').lower() == 'medium': if cssdict.get('font-weight', '').lower() == 'medium':
cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium

View File

@ -15,13 +15,15 @@ from calibre.utils.icu import sort_key
from catalog_epub_mobi_ui import Ui_Form from catalog_epub_mobi_ui import Ui_Form
from PyQt4.Qt import (Qt, QAbstractItemView, QCheckBox, QComboBox, from PyQt4.Qt import (Qt, QAbstractItemView, QCheckBox, QComboBox,
QDoubleSpinBox, QIcon, QLineEdit, QRadioButton, QSize, QSizePolicy, QDoubleSpinBox, QIcon, QLineEdit, QObject, QRadioButton, QSize, QSizePolicy,
QTableWidget, QTableWidgetItem, QToolButton, QVBoxLayout, QWidget) QTableWidget, QTableWidgetItem, QToolButton, QVBoxLayout, QWidget,
SIGNAL)
class PluginWidget(QWidget,Ui_Form): class PluginWidget(QWidget,Ui_Form):
TITLE = _('E-book options') TITLE = _('E-book options')
HELP = _('Options specific to')+' AZW3/EPUB/MOBI '+_('output') HELP = _('Options specific to')+' AZW3/EPUB/MOBI '+_('output')
DEBUG = False
# Output synced to the connected device? # Output synced to the connected device?
sync_enabled = True sync_enabled = True
@ -100,6 +102,39 @@ class PluginWidget(QWidget,Ui_Form):
self.OPTION_FIELDS = option_fields self.OPTION_FIELDS = option_fields
def construct_tw_opts_object(self, c_name, opt_value, opts_dict):
'''
Build an opts object from the UI settings to pass to the catalog builder
Handles two types of rules sets, with and without ['prefix'] field
Store processed opts object to opt_dict
'''
rule_set = []
for stored_rule in opt_value:
rule = copy(stored_rule)
# Skip disabled and incomplete rules
if not rule['enabled']:
continue
elif not rule['field'] or not rule['pattern']:
continue
elif 'prefix' in rule and not rule['prefix']:
continue
else:
if rule['field'] != 'Tags':
# Look up custom column friendly name
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'] in [_('any value'),_('any date')]:
rule_pattern = '.*'
elif rule['pattern'] == _('unspecified'):
rule['pattern'] = 'None'
if 'prefix' in rule:
pr = (rule['name'],rule['field'],rule['pattern'],rule['prefix'])
else:
pr = (rule['name'],rule['field'],rule['pattern'])
rule_set.append(pr)
opt_value = tuple(rule_set)
# Strip off the trailing '_tw'
opts_dict[c_name[:-3]] = opt_value
def fetchEligibleCustomFields(self): def fetchEligibleCustomFields(self):
self.all_custom_fields = self.db.custom_field_keys() self.all_custom_fields = self.db.custom_field_keys()
custom_fields = {} custom_fields = {}
@ -194,11 +229,10 @@ class PluginWidget(QWidget,Ui_Form):
def options(self): def options(self):
# Save/return the current options # Save/return the current options
# exclude_genre stores literally # exclude_genre stores literally
# generate_titles, generate_recently_added store as True/False # Section switches store as True/False
# others store as lists # others store as lists
opts_dict = {} opts_dict = {}
# Save values to gprefs
prefix_rules_processed = False prefix_rules_processed = False
exclusion_rules_processed = False exclusion_rules_processed = False
@ -229,56 +263,8 @@ class PluginWidget(QWidget,Ui_Form):
gprefs.set(self.name + '_' + c_name, opt_value) gprefs.set(self.name + '_' + c_name, opt_value)
# Construct opts object for catalog builder # Construct opts object for catalog builder
if c_name == 'prefix_rules_tw': if c_name in ['exclusion_rules_tw','prefix_rules_tw']:
rule_set = [] self.construct_tw_opts_object(c_name, opt_value, opts_dict)
for stored_rule in opt_value:
# Test for empty name/field/pattern/prefix, continue
# If pattern = any or unspecified, convert to regex
rule = copy(stored_rule)
if not rule['enabled']:
continue
elif not rule['field'] or not rule['pattern'] or not rule['prefix']:
continue
else:
if rule['field'] != 'Tags':
# Look up custom column name
#print(self.eligible_custom_fields[rule['field']]['field'])
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'].startswith('any'):
rule['pattern'] = '.*'
elif rule['pattern'] == 'unspecified':
rule['pattern'] = 'None'
pr = (rule['name'],rule['field'],rule['pattern'],rule['prefix'])
rule_set.append(pr)
opt_value = tuple(rule_set)
opts_dict['prefix_rules'] = opt_value
elif c_name == 'exclusion_rules_tw':
rule_set = []
for stored_rule in opt_value:
# Test for empty name/field/pattern/prefix, continue
# If pattern = any or unspecified, convert to regex
rule = copy(stored_rule)
if not rule['enabled']:
continue
elif not rule['field'] or not rule['pattern']:
continue
else:
if rule['field'] != 'Tags':
# Look up custom column name
#print(self.eligible_custom_fields[rule['field']]['field'])
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'].startswith('any'):
rule['pattern'] = '.*'
elif rule['pattern'] == 'unspecified':
rule['pattern'] = 'None'
pr = (rule['name'],rule['field'],rule['pattern'])
rule_set.append(pr)
opt_value = tuple(rule_set)
opts_dict['exclusion_rules'] = opt_value
else: else:
opts_dict[c_name] = opt_value opts_dict[c_name] = opt_value
@ -299,7 +285,7 @@ class PluginWidget(QWidget,Ui_Form):
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']] opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
except: except:
opts_dict['output_profile'] = ['default'] opts_dict['output_profile'] = ['default']
if False: if self.DEBUG:
print "opts_dict" print "opts_dict"
for opt in sorted(opts_dict.keys(), key=sort_key): for opt in sorted(opts_dict.keys(), key=sort_key):
print " %s: %s" % (opt, repr(opts_dict[opt])) print " %s: %s" % (opt, repr(opts_dict[opt]))
@ -343,7 +329,6 @@ class PluginWidget(QWidget,Ui_Form):
self.header_note_source_fields = custom_fields self.header_note_source_fields = custom_fields
self.header_note_source_field.currentIndexChanged.connect(self.header_note_source_field_changed) self.header_note_source_field.currentIndexChanged.connect(self.header_note_source_field_changed)
# Populate the 'Merge with Comments' combo box # Populate the 'Merge with Comments' combo box
custom_fields = {} custom_fields = {}
for custom_field in self.all_custom_fields: for custom_field in self.all_custom_fields:
@ -450,10 +435,11 @@ class ComboBox(NoWheelComboBox):
class GenericRulesTable(QTableWidget): class GenericRulesTable(QTableWidget):
''' '''
Generic methods for managing rows Generic methods for managing rows in a QTableWidget
Add QTableWidget, controls to parent QGroupBox
placeholders for basic methods to be overriden
''' '''
DEBUG = False
MAXIMUM_TABLE_HEIGHT = 113
NAME_FIELD_WIDTH = 225
def __init__(self, parent_gb, object_name, rules, eligible_custom_fields, db): def __init__(self, parent_gb, object_name, rules, eligible_custom_fields, db):
self.rules = rules self.rules = rules
@ -464,13 +450,12 @@ class GenericRulesTable(QTableWidget):
self.layout = parent_gb.layout() self.layout = parent_gb.layout()
# Add ourselves to the layout # Add ourselves to the layout
#print("verticalHeader: %s" % dir(self.verticalHeader()))
sizePolicy = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum) sizePolicy = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum)
sizePolicy.setHorizontalStretch(0) sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0) sizePolicy.setVerticalStretch(0)
#sizePolicy.setHeightForWidth(self.sizePolicy().hasHeightForWidth()) #sizePolicy.setHeightForWidth(self.sizePolicy().hasHeightForWidth())
self.setSizePolicy(sizePolicy) self.setSizePolicy(sizePolicy)
self.setMaximumSize(QSize(16777215, 113)) self.setMaximumSize(QSize(16777215, self.MAXIMUM_TABLE_HEIGHT))
self.setColumnCount(0) self.setColumnCount(0)
self.setRowCount(0) self.setRowCount(0)
@ -481,6 +466,9 @@ class GenericRulesTable(QTableWidget):
self._init_controls() self._init_controls()
# Hook check_box changes
QObject.connect(self, SIGNAL('cellChanged(int,int)'), self.enabled_state_changed)
def _init_controls(self): def _init_controls(self):
# Add the control set # Add the control set
vbl = QVBoxLayout() vbl = QVBoxLayout()
@ -517,6 +505,8 @@ class GenericRulesTable(QTableWidget):
def add_row(self): def add_row(self):
self.setFocus() self.setFocus()
row = self.last_row_selected + 1 row = self.last_row_selected + 1
if self.DEBUG:
print("%s:add_row(): at row: %d" % (self.objectName(), row))
self.insertRow(row) self.insertRow(row)
self.populate_table_row(row, self.create_blank_row_data()) self.populate_table_row(row, self.create_blank_row_data())
self.select_and_scroll_to_row(row) self.select_and_scroll_to_row(row)
@ -524,19 +514,10 @@ class GenericRulesTable(QTableWidget):
# In case table was empty # In case table was empty
self.horizontalHeader().setStretchLastSection(True) self.horizontalHeader().setStretchLastSection(True)
def convert_row_to_data(self):
'''
override
'''
pass
def create_blank_row_data(self):
'''
override
'''
pass
def delete_row(self): def delete_row(self):
if self.DEBUG:
print("%s:delete_row()" % self.objectName())
self.setFocus() self.setFocus()
rows = self.last_rows_selected rows = self.last_rows_selected
if len(rows) == 0: if len(rows) == 0:
@ -545,10 +526,11 @@ class GenericRulesTable(QTableWidget):
first = rows[0].row() + 1 first = rows[0].row() + 1
last = rows[-1].row() + 1 last = rows[-1].row() + 1
message = _('Are you sure you want to delete rule %d?') % first first_rule_name = unicode(self.cellWidget(first-1,self.COLUMNS['NAME']['ordinal']).text()).strip()
message = _("Are you sure you want to delete '%s'?") % (first_rule_name)
if len(rows) > 1: if len(rows) > 1:
message = _('Are you sure you want to delete rules %d-%d?') % (first, last) message = _('Are you sure you want to delete rules #%d-%d?') % (first, last)
if not question_dialog(self, _('Are you sure?'), message, show_copy_button=False): if not question_dialog(self, _('Delete Rule'), message, show_copy_button=False):
return return
first_sel_row = self.currentRow() first_sel_row = self.currentRow()
for selrow in reversed(rows): for selrow in reversed(rows):
@ -558,17 +540,24 @@ class GenericRulesTable(QTableWidget):
elif self.rowCount() > 0: elif self.rowCount() > 0:
self.select_and_scroll_to_row(first_sel_row - 1) self.select_and_scroll_to_row(first_sel_row - 1)
def enabled_state_changed(self, row, col):
if col in [self.COLUMNS['ENABLED']['ordinal']]:
self.select_and_scroll_to_row(row)
if self.DEBUG:
print("%s:enabled_state_changed(): row %d col %d" %
(self.objectName(), row, col))
def focusInEvent(self,e):
if self.DEBUG:
print("%s:focusInEvent()" % self.objectName())
def focusOutEvent(self,e): def focusOutEvent(self,e):
# Override of QTableWidget method - clear selection when table loses focus # Override of QTableWidget method - clear selection when table loses focus
self.last_row_selected = self.currentRow() self.last_row_selected = self.currentRow()
self.last_rows_selected = self.selectionModel().selectedRows() self.last_rows_selected = self.selectionModel().selectedRows()
self.clearSelection() self.clearSelection()
if self.DEBUG:
def get_data(self): print("%s:focusOutEvent(): self.last_row_selected: %d" % (self.objectName(),self.last_row_selected))
'''
override
'''
pass
def move_row_down(self): def move_row_down(self):
self.setFocus() self.setFocus()
@ -583,6 +572,8 @@ class GenericRulesTable(QTableWidget):
for selrow in reversed(rows): for selrow in reversed(rows):
dest_row = selrow.row() + 1 dest_row = selrow.row() + 1
src_row = selrow.row() src_row = selrow.row()
if self.DEBUG:
print("%s:move_row_down() %d -> %d" % (self.objectName(),src_row, dest_row))
# Save the contents of the destination row # Save the contents of the destination row
saved_data = self.convert_row_to_data(dest_row) saved_data = self.convert_row_to_data(dest_row)
@ -596,11 +587,9 @@ class GenericRulesTable(QTableWidget):
# Populate it with the saved data # Populate it with the saved data
self.populate_table_row(src_row, saved_data) self.populate_table_row(src_row, saved_data)
self.blockSignals(False)
scroll_to_row = last_sel_row + 1 scroll_to_row = last_sel_row + 1
if scroll_to_row < self.rowCount() - 1:
scroll_to_row = scroll_to_row + 1
self.select_and_scroll_to_row(scroll_to_row) self.select_and_scroll_to_row(scroll_to_row)
self.blockSignals(False)
def move_row_up(self): def move_row_up(self):
self.setFocus() self.setFocus()
@ -611,7 +600,11 @@ class GenericRulesTable(QTableWidget):
if first_sel_row <= 0: if first_sel_row <= 0:
return return
self.blockSignals(True) self.blockSignals(True)
for selrow in rows: for selrow in rows:
if self.DEBUG:
print("%s:move_row_up() %d -> %d" % (self.objectName(),selrow.row(), selrow.row()-1))
# Save the row above # Save the row above
saved_data = self.convert_row_to_data(selrow.row() - 1) saved_data = self.convert_row_to_data(selrow.row() - 1)
@ -621,33 +614,92 @@ class GenericRulesTable(QTableWidget):
# Delete the row above # Delete the row above
self.removeRow(selrow.row() - 1) self.removeRow(selrow.row() - 1)
self.blockSignals(False)
scroll_to_row = first_sel_row - 1 scroll_to_row = first_sel_row
if scroll_to_row > 0: if scroll_to_row > 0:
scroll_to_row = scroll_to_row - 1 scroll_to_row = scroll_to_row - 1
self.select_and_scroll_to_row(scroll_to_row) self.select_and_scroll_to_row(scroll_to_row)
self.blockSignals(False)
def populate_table_row(self): def populate_table(self):
''' # Format of rules list is different if default values vs retrieved JSON
override # Hack to normalize list style
''' rules = self.rules
pass if rules and type(rules[0]) is list:
rules = rules[0]
self.setFocus()
rules = sorted(rules, key=lambda k: k['ordinal'])
for row, rule in enumerate(rules):
self.insertRow(row)
self.select_and_scroll_to_row(row)
self.populate_table_row(row, rule)
self.selectRow(0)
def resize_name(self, scale): def resize_name(self):
#current_width = self.columnWidth(1) self.setColumnWidth(1, self.NAME_FIELD_WIDTH)
#self.setColumnWidth(1, min(225,int(current_width * scale)))
self.setColumnWidth(1, 225)
def rule_name_edited(self): def rule_name_edited(self):
if self.DEBUG:
print("%s:rule_name_edited()" % self.objectName())
current_row = self.currentRow() current_row = self.currentRow()
self.cellWidget(current_row,1).home(False) self.cellWidget(current_row,1).home(False)
self.setFocus()
self.select_and_scroll_to_row(current_row) self.select_and_scroll_to_row(current_row)
def select_and_scroll_to_row(self, row): def select_and_scroll_to_row(self, row):
self.setFocus()
self.selectRow(row) self.selectRow(row)
self.scrollToItem(self.currentItem()) self.scrollToItem(self.currentItem())
self.last_row_selected = self.currentRow()
self.last_rows_selected = self.selectionModel().selectedRows()
def _source_index_changed(self, combo):
# Figure out which row we're in
for row in range(self.rowCount()):
if self.cellWidget(row, self.COLUMNS['FIELD']['ordinal']) is combo:
break
if self.DEBUG:
print("%s:_source_index_changed(): calling source_index_changed with row: %d " %
(self.objectName(), row))
self.source_index_changed(combo, row)
def source_index_changed(self, combo, row, pattern=''):
# Populate the Pattern field based upon the Source field
source_field = str(combo.currentText())
if source_field == '':
values = []
elif source_field == 'Tags':
values = sorted(self.db.all_tags(), key=sort_key)
else:
if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
values = self.db.all_custom(self.db.field_metadata.key_to_label(
self.eligible_custom_fields[unicode(source_field)]['field']))
values = sorted(values, key=sort_key)
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
values = [_('True'),_('False'),_('unspecified')]
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
values = [_('any value'),_('unspecified')]
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
values = [_('any date'),_('unspecified')]
values_combo = ComboBox(self, values, pattern)
values_combo.currentIndexChanged.connect(partial(self.values_index_changed, values_combo))
self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
self.select_and_scroll_to_row(row)
def values_index_changed(self, combo):
# After edit, select row
for row in range(self.rowCount()):
if self.cellWidget(row, self.COLUMNS['PATTERN']['ordinal']) is combo:
self.select_and_scroll_to_row(row)
break
if self.DEBUG:
print("%s:values_index_changed(): row %d " %
(self.objectName(), row))
class ExclusionRules(GenericRulesTable): class ExclusionRules(GenericRulesTable):
@ -658,12 +710,13 @@ class ExclusionRules(GenericRulesTable):
def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db): def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db):
super(ExclusionRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db) super(ExclusionRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db)
self.setObjectName("exclusion_rules_table")
self._init_table_widget() self._init_table_widget()
self._initialize() self._initialize()
def _init_table_widget(self): def _init_table_widget(self):
header_labels = [self.COLUMNS[index]['name'] \ header_labels = [self.COLUMNS[index]['name'] \
for index in sorted(self.COLUMNS.keys(), key=lambda c: self.COLUMNS[c]['ordinal'])] for index in sorted(self.COLUMNS.keys(), key=lambda c: self.COLUMNS[c]['ordinal'])]
self.setColumnCount(len(header_labels)) self.setColumnCount(len(header_labels))
self.setHorizontalHeaderLabels(header_labels) self.setHorizontalHeaderLabels(header_labels)
self.setSortingEnabled(False) self.setSortingEnabled(False)
@ -672,7 +725,7 @@ class ExclusionRules(GenericRulesTable):
def _initialize(self): def _initialize(self):
self.populate_table() self.populate_table()
self.resizeColumnsToContents() self.resizeColumnsToContents()
self.resize_name(1.5) self.resize_name()
self.horizontalHeader().setStretchLastSection(True) self.horizontalHeader().setStretchLastSection(True)
self.clearSelection() self.clearSelection()
@ -706,20 +759,6 @@ class ExclusionRules(GenericRulesTable):
'pattern':data['pattern']}) 'pattern':data['pattern']})
return data_items return data_items
def populate_table(self):
# Format of rules list is different if default values vs retrieved JSON
# Hack to normalize list style
rules = self.rules
if rules and type(rules[0]) is list:
rules = rules[0]
self.setFocus()
rules = sorted(rules, key=lambda k: k['ordinal'])
for row, rule in enumerate(rules):
self.insertRow(row)
self.select_and_scroll_to_row(row)
self.populate_table_row(row, rule)
self.selectRow(0)
def populate_table_row(self, row, data): def populate_table_row(self, row, data):
def set_rule_name_in_row(row, col, name=''): def set_rule_name_in_row(row, col, name=''):
@ -730,7 +769,7 @@ class ExclusionRules(GenericRulesTable):
def set_source_field_in_row(row, col, field=''): def set_source_field_in_row(row, col, field=''):
source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field) source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field)
source_combo.currentIndexChanged.connect(partial(self.source_index_changed, source_combo, row)) source_combo.currentIndexChanged.connect(partial(self._source_index_changed, source_combo))
self.setCellWidget(row, col, source_combo) self.setCellWidget(row, col, source_combo)
return source_combo return source_combo
@ -738,7 +777,8 @@ class ExclusionRules(GenericRulesTable):
self.blockSignals(True) self.blockSignals(True)
# Enabled # Enabled
self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled'])) check_box = CheckableTableWidgetItem(data['enabled'])
self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], check_box)
# Rule name # Rule name
set_rule_name_in_row(row, self.COLUMNS['NAME']['ordinal'], name=data['name']) set_rule_name_in_row(row, self.COLUMNS['NAME']['ordinal'], name=data['name'])
@ -748,32 +788,10 @@ class ExclusionRules(GenericRulesTable):
# Pattern # Pattern
# The contents of the Pattern field is driven by the Source field # The contents of the Pattern field is driven by the Source field
self.source_index_changed(source_combo, row, self.COLUMNS['PATTERN']['ordinal'], pattern=data['pattern']) self.source_index_changed(source_combo, row, pattern=data['pattern'])
self.blockSignals(False) self.blockSignals(False)
def source_index_changed(self, combo, row, col, pattern=''):
# Populate the Pattern field based upon the Source field
source_field = str(combo.currentText())
if source_field == '':
values = []
elif source_field == 'Tags':
values = sorted(self.db.all_tags(), key=sort_key)
else:
if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
values = self.db.all_custom(self.db.field_metadata.key_to_label(
self.eligible_custom_fields[unicode(source_field)]['field']))
values = sorted(values, key=sort_key)
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
values = ['True','False','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
values = ['any value','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
values = ['any date','unspecified']
values_combo = ComboBox(self, values, pattern)
self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
class PrefixRules(GenericRulesTable): class PrefixRules(GenericRulesTable):
COLUMNS = { 'ENABLED':{'ordinal': 0, 'name': ''}, COLUMNS = { 'ENABLED':{'ordinal': 0, 'name': ''},
@ -784,12 +802,13 @@ class PrefixRules(GenericRulesTable):
def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db): def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db):
super(PrefixRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db) super(PrefixRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db)
self.setObjectName("prefix_rules_table")
self._init_table_widget() self._init_table_widget()
self._initialize() self._initialize()
def _init_table_widget(self): def _init_table_widget(self):
header_labels = [self.COLUMNS[index]['name'] \ header_labels = [self.COLUMNS[index]['name'] \
for index in sorted(self.COLUMNS.keys(), key=lambda c: self.COLUMNS[c]['ordinal'])] for index in sorted(self.COLUMNS.keys(), key=lambda c: self.COLUMNS[c]['ordinal'])]
self.setColumnCount(len(header_labels)) self.setColumnCount(len(header_labels))
self.setHorizontalHeaderLabels(header_labels) self.setHorizontalHeaderLabels(header_labels)
self.setSortingEnabled(False) self.setSortingEnabled(False)
@ -799,14 +818,14 @@ class PrefixRules(GenericRulesTable):
self.generate_prefix_list() self.generate_prefix_list()
self.populate_table() self.populate_table()
self.resizeColumnsToContents() self.resizeColumnsToContents()
self.resize_name(1.5) self.resize_name()
self.horizontalHeader().setStretchLastSection(True) self.horizontalHeader().setStretchLastSection(True)
self.clearSelection() self.clearSelection()
def convert_row_to_data(self, row): def convert_row_to_data(self, row):
data = self.create_blank_row_data() data = self.create_blank_row_data()
data['ordinal'] = row data['ordinal'] = row
data['enabled'] = self.item(row,0).checkState() == Qt.Checked data['enabled'] = self.item(row,self.COLUMNS['ENABLED']['ordinal']).checkState() == Qt.Checked
data['name'] = unicode(self.cellWidget(row,self.COLUMNS['NAME']['ordinal']).text()).strip() data['name'] = unicode(self.cellWidget(row,self.COLUMNS['NAME']['ordinal']).text()).strip()
data['prefix'] = unicode(self.cellWidget(row,self.COLUMNS['PREFIX']['ordinal']).currentText()).strip() data['prefix'] = unicode(self.cellWidget(row,self.COLUMNS['PREFIX']['ordinal']).currentText()).strip()
data['field'] = unicode(self.cellWidget(row,self.COLUMNS['FIELD']['ordinal']).currentText()).strip() data['field'] = unicode(self.cellWidget(row,self.COLUMNS['FIELD']['ordinal']).currentText()).strip()
@ -970,20 +989,6 @@ class PrefixRules(GenericRulesTable):
'prefix':data['prefix']}) 'prefix':data['prefix']})
return data_items return data_items
def populate_table(self):
# Format of rules list is different if default values vs retrieved JSON
# Hack to normalize list style
rules = self.rules
if rules and type(rules[0]) is list:
rules = rules[0]
self.setFocus()
rules = sorted(rules, key=lambda k: k['ordinal'])
for row, rule in enumerate(rules):
self.insertRow(row)
self.select_and_scroll_to_row(row)
self.populate_table_row(row, rule)
self.selectRow(0)
def populate_table_row(self, row, data): def populate_table_row(self, row, data):
def set_prefix_field_in_row(row, col, field=''): def set_prefix_field_in_row(row, col, field=''):
@ -998,14 +1003,12 @@ class PrefixRules(GenericRulesTable):
def set_source_field_in_row(row, col, field=''): def set_source_field_in_row(row, col, field=''):
source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field) source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field)
source_combo.currentIndexChanged.connect(partial(self.source_index_changed, source_combo, row)) source_combo.currentIndexChanged.connect(partial(self._source_index_changed, source_combo))
self.setCellWidget(row, col, source_combo) self.setCellWidget(row, col, source_combo)
return source_combo return source_combo
# Entry point # Entry point
self.blockSignals(True) self.blockSignals(True)
#print("prefix_rules_populate_table_row processing rule:\n%s\n" % data)
# Enabled # Enabled
self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled'])) self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled']))
@ -1021,31 +1024,7 @@ class PrefixRules(GenericRulesTable):
# Pattern # Pattern
# The contents of the Pattern field is driven by the Source field # The contents of the Pattern field is driven by the Source field
self.source_index_changed(source_combo, row, self.COLUMNS['PATTERN']['ordinal'], pattern=data['pattern']) self.source_index_changed(source_combo, row, pattern=data['pattern'])
self.blockSignals(False) self.blockSignals(False)
def source_index_changed(self, combo, row, col, pattern=''):
# Populate the Pattern field based upon the Source field
# row, col are the control that changed
source_field = str(combo.currentText())
if source_field == '':
values = []
elif source_field == 'Tags':
values = sorted(self.db.all_tags(), key=sort_key)
else:
if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
values = self.db.all_custom(self.db.field_metadata.key_to_label(
self.eligible_custom_fields[unicode(source_field)]['field']))
values = sorted(values, key=sort_key)
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
values = ['True','False','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
values = ['any value','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
values = ['any date','unspecified']
values_combo = ComboBox(self, values, pattern)
self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)

View File

@ -25,7 +25,7 @@ class PluginWidget(Widget, Ui_Form):
'mobi_keep_original_images', 'mobi_keep_original_images',
'mobi_ignore_margins', 'mobi_toc_at_start', 'mobi_ignore_margins', 'mobi_toc_at_start',
'dont_compress', 'no_inline_toc', 'share_not_sync', 'dont_compress', 'no_inline_toc', 'share_not_sync',
'personal_doc']#, 'mobi_navpoints_only_deepest'] 'personal_doc', 'mobi_file_type']
) )
self.db, self.book_id = db, book_id self.db, self.book_id = db, book_id
@ -48,6 +48,7 @@ class PluginWidget(Widget, Ui_Form):
self.font_family_model = font_family_model self.font_family_model = font_family_model
self.opt_masthead_font.setModel(self.font_family_model) self.opt_masthead_font.setModel(self.font_family_model)
''' '''
self.opt_mobi_file_type.addItems(['old', 'both', 'new'])
self.initialize_options(get_option, get_help, db, book_id) self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,80 +14,10 @@
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="8" column="0" colspan="2"> <item row="0" column="0">
<widget class="QGroupBox" name="groupBox"> <widget class="QCheckBox" name="opt_no_inline_toc">
<property name="title">
<string>Kindle options</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QLabel" name="label_3">
<property name="text">
<string>Personal Doc tag:</string>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="opt_personal_doc"/>
</item>
</layout>
</item>
<item>
<widget class="QCheckBox" name="opt_share_not_sync">
<property name="text">
<string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
<item row="9" column="0">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_mobi_toc_at_start">
<property name="text"> <property name="text">
<string>Put generated Table of Contents at &amp;start of book instead of end</string> <string>Do not add Table of Contents to book</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="opt_mobi_ignore_margins">
<property name="text">
<string>Ignore &amp;margins</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_prefer_author_sort">
<property name="text">
<string>Use author &amp;sort for author</string>
</property> </property>
</widget> </widget>
</item> </item>
@ -104,17 +34,24 @@
<item row="1" column="1"> <item row="1" column="1">
<widget class="QLineEdit" name="opt_toc_title"/> <widget class="QLineEdit" name="opt_toc_title"/>
</item> </item>
<item row="6" column="0"> <item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_dont_compress"> <widget class="QCheckBox" name="opt_mobi_toc_at_start">
<property name="text"> <property name="text">
<string>Disable compression of the file contents</string> <string>Put generated Table of Contents at &amp;start of book instead of end</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="0" column="0"> <item row="3" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc"> <widget class="QCheckBox" name="opt_mobi_ignore_margins">
<property name="text"> <property name="text">
<string>Do not add Table of Contents to book</string> <string>Ignore &amp;margins</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="opt_prefer_author_sort">
<property name="text">
<string>Use author &amp;sort for author</string>
</property> </property>
</widget> </widget>
</item> </item>
@ -125,6 +62,55 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_compress">
<property name="text">
<string>Disable compression of the file contents</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Kindle options</string>
</property>
<layout class="QFormLayout" name="formLayout">
<property name="fieldGrowthPolicy">
<enum>QFormLayout::ExpandingFieldsGrow</enum>
</property>
<item row="0" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>MOBI file &amp;type:</string>
</property>
<property name="buddy">
<cstring>opt_mobi_file_type</cstring>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="opt_mobi_file_type"/>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Personal Doc tag:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="opt_personal_doc"/>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_share_not_sync">
<property name="text">
<string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View File

@ -529,6 +529,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
self.remove_button.clicked.connect(self.s_r_remove_query) self.remove_button.clicked.connect(self.s_r_remove_query)
self.queries = JSONConfig("search_replace_queries") self.queries = JSONConfig("search_replace_queries")
self.saved_search_name = ''
self.query_field.addItem("") self.query_field.addItem("")
self.query_field_values = sorted([q for q in self.queries], key=sort_key) self.query_field_values = sorted([q for q in self.queries], key=sort_key)
self.query_field.addItems(self.query_field_values) self.query_field.addItems(self.query_field_values)
@ -1034,11 +1035,16 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
self.queries.commit() self.queries.commit()
def s_r_save_query(self, *args): def s_r_save_query(self, *args):
dex = self.query_field_values.index(self.saved_search_name) names = ['']
names.extend(self.query_field_values)
try:
dex = names.index(self.saved_search_name)
except:
dex = 0
name = '' name = ''
while not name: while not name:
name, ok = QInputDialog.getItem(self, _('Save search/replace'), name, ok = QInputDialog.getItem(self, _('Save search/replace'),
_('Search/replace name:'), self.query_field_values, dex, True) _('Search/replace name:'), names, dex, True)
if not ok: if not ok:
return return
if not name: if not name:
@ -1086,6 +1092,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
def s_r_query_change(self, item_name): def s_r_query_change(self, item_name):
if not item_name: if not item_name:
self.s_r_reset_query_fields() self.s_r_reset_query_fields()
self.saved_search_name = ''
return return
item = self.queries.get(unicode(item_name), None) item = self.queries.get(unicode(item_name), None)
if item is None: if item is None:

View File

@ -1241,17 +1241,18 @@ not multiple and the destination field is multiple</string>
<tabstop>search_mode</tabstop> <tabstop>search_mode</tabstop>
<tabstop>s_r_src_ident</tabstop> <tabstop>s_r_src_ident</tabstop>
<tabstop>s_r_template</tabstop> <tabstop>s_r_template</tabstop>
<tabstop>search_for</tabstop>
<tabstop>case_sensitive</tabstop>
<tabstop>replace_with</tabstop> <tabstop>replace_with</tabstop>
<tabstop>replace_func</tabstop> <tabstop>replace_func</tabstop>
<tabstop>destination_field</tabstop>
<tabstop>replace_mode</tabstop> <tabstop>replace_mode</tabstop>
<tabstop>comma_separated</tabstop> <tabstop>comma_separated</tabstop>
<tabstop>s_r_dst_ident</tabstop> <tabstop>s_r_dst_ident</tabstop>
<tabstop>results_count</tabstop> <tabstop>results_count</tabstop>
<tabstop>scrollArea11</tabstop>
<tabstop>destination_field</tabstop>
<tabstop>search_for</tabstop>
<tabstop>case_sensitive</tabstop>
<tabstop>starting_from</tabstop> <tabstop>starting_from</tabstop>
<tabstop>multiple_separator</tabstop>
<tabstop>scrollArea11</tabstop>
</tabstops> </tabstops>
<resources> <resources>
<include location="../../../../resources/images.qrc"/> <include location="../../../../resources/images.qrc"/>

View File

@ -310,8 +310,18 @@ class MetadataSingleDialogBase(ResizableDialog):
self.update_from_mi(mi) self.update_from_mi(mi)
def cover_from_format(self, *args): def cover_from_format(self, *args):
mi, ext = self.formats_manager.get_selected_format_metadata(self.db, try:
self.book_id) mi, ext = self.formats_manager.get_selected_format_metadata(self.db,
self.book_id)
except (IOError, OSError) as err:
if getattr(err, 'errno', None) == errno.EACCES: # Permission denied
import traceback
fname = err.filename if err.filename else 'file'
error_dialog(self, _('Permission denied'),
_('Could not open %s. Is it being used by another'
' program?')%fname, det_msg=traceback.format_exc(),
show=True)
return
if mi is None: if mi is None:
return return
cdata = None cdata = None

View File

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2012, Florent FAYOLLE <florent.fayolle69@gmail.com>'
__docformat__ = 'restructuredtext en'
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
from calibre.gui2.store.search_result import SearchResult
import unicodedata
#mimetypes.add_type('application/epub+zip', '.epub')
class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
open_search_url = 'http://www.ebooksgratuits.com/opds/opensearch.xml'
web_url = 'http://www.ebooksgratuits.com/'
def strip_accents(self, s):
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
def search(self, query, max_results=10, timeout=60):
query = self.strip_accents(unicode(query))
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
if s.downloads:
s.drm = SearchResult.DRM_UNLOCKED
s.price = '$0.00'
yield s

View File

@ -417,7 +417,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
vprefs.set('viewer_splitter_state', vprefs.set('viewer_splitter_state',
bytearray(self.splitter.saveState())) bytearray(self.splitter.saveState()))
vprefs['multiplier'] = self.view.multiplier vprefs['multiplier'] = self.view.multiplier
vprefs['in_paged_mode1'] = not self.action_toggle_paged_mode.isChecked() vprefs['in_paged_mode'] = not self.action_toggle_paged_mode.isChecked()
def restore_state(self): def restore_state(self):
state = vprefs.get('viewer_toolbar_state', None) state = vprefs.get('viewer_toolbar_state', None)
@ -434,8 +434,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
# specific location, ensure they are visible. # specific location, ensure they are visible.
self.tool_bar.setVisible(True) self.tool_bar.setVisible(True)
self.tool_bar2.setVisible(True) self.tool_bar2.setVisible(True)
self.action_toggle_paged_mode.setChecked(not vprefs.get('in_paged_mode1', self.action_toggle_paged_mode.setChecked(not vprefs.get('in_paged_mode',
False)) True))
self.toggle_paged_mode(self.action_toggle_paged_mode.isChecked(), self.toggle_paged_mode(self.action_toggle_paged_mode.isChecked(),
at_start=True) at_start=True)

View File

@ -440,8 +440,7 @@ class KindlePage(QWizardPage, KindleUI):
x = unicode(self.to_address.text()).strip() x = unicode(self.to_address.text()).strip()
parts = x.split('@') parts = x.split('@')
if (self.send_email_widget.set_email_settings(True) and len(parts) >= 2 if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
and parts[0]):
conf = smtp_prefs() conf = smtp_prefs()
accounts = conf.parse().accounts accounts = conf.parse().accounts
if not accounts: accounts = {} if not accounts: accounts = {}
@ -676,8 +675,9 @@ class LibraryPage(QWizardPage, LibraryUI):
self.language.blockSignals(True) self.language.blockSignals(True)
self.language.clear() self.language.clear()
from calibre.utils.localization import (available_translations, from calibre.utils.localization import (available_translations,
get_language, get_lang) get_language, get_lang, get_lc_messages_path)
lang = get_lang() lang = get_lang()
lang = get_lc_messages_path(lang) if lang else lang
if lang is None or lang not in available_translations(): if lang is None or lang not in available_translations():
lang = 'en' lang = 'en'
def get_esc_lang(l): def get_esc_lang(l):

View File

@ -341,7 +341,8 @@ class EPUB_MOBI(CatalogPlugin):
recommendations.append(('comments', '', OptionRecommendation.HIGH)) recommendations.append(('comments', '', OptionRecommendation.HIGH))
# >>> Use to debug generated catalog code before conversion <<< # >>> Use to debug generated catalog code before conversion <<<
#setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug")) if False:
setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug"))
dp = getattr(opts, 'debug_pipeline', None) dp = getattr(opts, 'debug_pipeline', None)
if dp is not None: if dp is not None:

View File

@ -1188,11 +1188,11 @@ Author '{0}':
current_series = book['series'] current_series = book['series']
pSeriesTag = Tag(soup,'p') pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series" pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series: if self.opts.generate_series:
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
aTag['href'] = "%s.html#%s_series" % ('BySeries', aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(book['series']))
re.sub('\s','',book['series']).lower())
aTag.insert(0, book['series']) aTag.insert(0, book['series'])
pSeriesTag.insert(0, aTag) pSeriesTag.insert(0, aTag)
else: else:
@ -1331,10 +1331,13 @@ Author '{0}':
current_series = new_entry['series'] current_series = new_entry['series']
pSeriesTag = Tag(soup,'p') pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series" pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series: if self.opts.generate_series:
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
aTag['href'] = "%s.html#%s_series" % ('BySeries',
re.sub('\W','',new_entry['series']).lower()) if self.letter_or_symbol(new_entry['series']) == self.SYMBOLS:
aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(new_entry['series']))
aTag.insert(0, new_entry['series']) aTag.insert(0, new_entry['series'])
pSeriesTag.insert(0, aTag) pSeriesTag.insert(0, aTag)
else: else:
@ -1741,17 +1744,6 @@ Author '{0}':
body = soup.find('body') body = soup.find('body')
btc = 0 btc = 0
pTag = Tag(soup, "p")
pTag['style'] = 'display:none'
ptc = 0
aTag = Tag(soup,'a')
aTag['id'] = 'section_start'
pTag.insert(ptc, aTag)
ptc += 1
body.insert(btc, pTag)
btc += 1
divTag = Tag(soup, "div") divTag = Tag(soup, "div")
dtc = 0 dtc = 0
current_letter = "" current_letter = ""
@ -1787,11 +1779,10 @@ Author '{0}':
current_series = book['series'] current_series = book['series']
pSeriesTag = Tag(soup,'p') pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series" pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
aTag = Tag(soup, 'a') aTag = Tag(soup, 'a')
if self.letter_or_symbol(book['series']): aTag['id'] = self.generateSeriesAnchor(book['series'])
aTag['id'] = "symbol_%s_series" % re.sub('\W','',book['series']).lower()
else:
aTag['id'] = "%s_series" % re.sub('\W','',book['series']).lower()
pSeriesTag.insert(0,aTag) pSeriesTag.insert(0,aTag)
pSeriesTag.insert(1,NavigableString('%s' % book['series'])) pSeriesTag.insert(1,NavigableString('%s' % book['series']))
divTag.insert(dtc,pSeriesTag) divTag.insert(dtc,pSeriesTag)
@ -1847,19 +1838,23 @@ Author '{0}':
divTag.insert(dtc, pBookTag) divTag.insert(dtc, pBookTag)
dtc += 1 dtc += 1
pTag = Tag(soup, "p")
pTag['class'] = 'title'
ptc = 0
aTag = Tag(soup,'a')
aTag['id'] = 'section_start'
pTag.insert(ptc, aTag)
ptc += 1
if not self.__generateForKindle: if not self.__generateForKindle:
# Insert the <h2> tag with book_count at the head # Insert the <h2> tag with book_count at the head
#<h2><a name="byseries" id="byseries"></a>By Series</h2>
pTag = Tag(soup, "p")
pTag['class'] = 'title'
aTag = Tag(soup, "a") aTag = Tag(soup, "a")
anchor_name = friendly_name.lower() anchor_name = friendly_name.lower()
aTag['id'] = anchor_name.replace(" ","") aTag['id'] = anchor_name.replace(" ","")
pTag.insert(0,aTag) pTag.insert(0,aTag)
#h2Tag.insert(1,NavigableString('%s (%d)' % (friendly_name, series_count)))
pTag.insert(1,NavigableString('%s' % friendly_name)) pTag.insert(1,NavigableString('%s' % friendly_name))
body.insert(btc,pTag) body.insert(btc,pTag)
btc += 1 btc += 1
# Add the divTag to the body # Add the divTag to the body
body.insert(btc, divTag) body.insert(btc, divTag)
@ -3353,15 +3348,23 @@ Author '{0}':
return codeTag return codeTag
else: else:
spanTag = Tag(soup, "span") spanTag = Tag(soup, "span")
spanTag['class'] = "prefix"
# color:white was the original technique used to align columns.
# The new technique is to float the prefix left with CSS.
if prefix_char is None: if prefix_char is None:
spanTag['style'] = "color:white" if True:
prefix_char = self.defaultPrefix prefix_char = "&nbsp;"
else:
del spanTag['class']
spanTag['style'] = "color:white"
prefix_char = self.defaultPrefix
spanTag.insert(0,NavigableString(prefix_char)) spanTag.insert(0,NavigableString(prefix_char))
return spanTag return spanTag
def generateAuthorAnchor(self, author): def generateAuthorAnchor(self, author):
# Strip white space to '' # Generate a legal XHTML id/href string
return re.sub("\W","", author) return re.sub("\W","", ascii_text(author))
def generateFormatArgs(self, book): def generateFormatArgs(self, book):
series_index = str(book['series_index']) series_index = str(book['series_index'])
@ -3436,10 +3439,11 @@ Author '{0}':
current_series = book['series'] current_series = book['series']
pSeriesTag = Tag(soup,'p') pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series" pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series: if self.opts.generate_series:
aTag = Tag(soup,'a') aTag = Tag(soup,'a')
aTag['href'] = "%s.html#%s_series" % ('BySeries', aTag['href'] = "%s.html#%s" % ('BySeries', self.generateSeriesAnchor(book['series']))
re.sub('\W','',book['series']).lower())
aTag.insert(0, book['series']) aTag.insert(0, book['series'])
pSeriesTag.insert(0, aTag) pSeriesTag.insert(0, aTag)
else: else:
@ -3641,12 +3645,7 @@ Author '{0}':
if aTag: if aTag:
if book['series']: if book['series']:
if self.opts.generate_series: if self.opts.generate_series:
if self.letter_or_symbol(book['series']): aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(book['series']))
aTag['href'] = "%s.html#symbol_%s_series" % ('BySeries',
re.sub('\W','',book['series']).lower())
else:
aTag['href'] = "%s.html#%s_series" % ('BySeries',
re.sub('\s','',book['series']).lower())
else: else:
aTag.extract() aTag.extract()
@ -3780,6 +3779,13 @@ Author '{0}':
pass pass
return rating return rating
def generateSeriesAnchor(self, series):
# Generate a legal XHTML id/href string
if self.letter_or_symbol(series) == self.SYMBOLS:
return "symbol_%s_series" % re.sub('\W','',series).lower()
else:
return "%s_series" % re.sub('\W','',ascii_text(series)).lower()
def generateShortDescription(self, description, dest=None): def generateShortDescription(self, description, dest=None):
# Truncate the description, on word boundaries if necessary # Truncate the description, on word boundaries if necessary
# Possible destinations: # Possible destinations:

View File

@ -11,7 +11,7 @@ import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \
from collections import defaultdict from collections import defaultdict
import threading, random import threading, random
from itertools import repeat from itertools import repeat
from math import ceil from math import ceil, floor
from calibre import prints, force_unicode from calibre import prints, force_unicode
from calibre.ebooks.metadata import (title_sort, author_to_author_sort, from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
@ -640,12 +640,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if name and name != fname: if name and name != fname:
changed = True changed = True
break break
if path == current_path and not changed:
return
tpath = os.path.join(self.library_path, *path.split('/')) tpath = os.path.join(self.library_path, *path.split('/'))
if not os.path.exists(tpath): if not os.path.exists(tpath):
os.makedirs(tpath) os.makedirs(tpath)
if path == current_path and not changed:
return
spath = os.path.join(self.library_path, *current_path.split('/')) spath = os.path.join(self.library_path, *current_path.split('/'))
if current_path and os.path.exists(spath): # Migrate existing files if current_path and os.path.exists(spath): # Migrate existing files
@ -1150,7 +1150,16 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
`data`: Can be either a QImage, QPixmap, file object or bytestring `data`: Can be either a QImage, QPixmap, file object or bytestring
''' '''
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') base_path = os.path.join(self.library_path, self.path(id,
index_is_id=True))
if not os.path.exists(base_path):
self.set_path(id, index_is_id=True)
base_path = os.path.join(self.library_path, self.path(id,
index_is_id=True))
self.dirtied([id])
path = os.path.join(base_path, 'cover.jpg')
if callable(getattr(data, 'save', None)): if callable(getattr(data, 'save', None)):
data.save(path) data.save(path)
else: else:
@ -2080,7 +2089,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return 1.0 return 1.0
series_indices = [x[0] for x in series_indices] series_indices = [x[0] for x in series_indices]
if tweaks['series_index_auto_increment'] == 'next': if tweaks['series_index_auto_increment'] == 'next':
return series_indices[-1] + 1 return floor(series_indices[-1]) + 1
if tweaks['series_index_auto_increment'] == 'first_free': if tweaks['series_index_auto_increment'] == 'first_free':
for i in range(1, 10000): for i in range(1, 10000):
if i not in series_indices: if i not in series_indices:

View File

@ -42,7 +42,7 @@ class Restore(Thread):
self.src_library_path = os.path.abspath(library_path) self.src_library_path = os.path.abspath(library_path)
self.progress_callback = progress_callback self.progress_callback = progress_callback
self.db_id_regexp = re.compile(r'^.* \((\d+)\)$') self.db_id_regexp = re.compile(r'^.* \((\d+)\)$')
self.bad_ext_pat = re.compile(r'[^a-z0-9]+') self.bad_ext_pat = re.compile(r'[^a-z0-9_]+')
if not callable(self.progress_callback): if not callable(self.progress_callback):
self.progress_callback = lambda x, y: x self.progress_callback = lambda x, y: x
self.dirs = [] self.dirs = []

View File

@ -22,6 +22,34 @@ def available_translations():
_available_translations = [x for x in stats if stats[x] > 0.1] _available_translations = [x for x in stats if stats[x] > 0.1]
return _available_translations return _available_translations
def get_system_locale():
from calibre.constants import iswindows
lang = None
if iswindows:
try:
from calibre.constants import get_windows_user_locale_name
lang = get_windows_user_locale_name()
lang = lang.strip()
if not lang: lang = None
except:
pass # Windows XP does not have the GetUserDefaultLocaleName fn
if lang is None:
try:
lang = locale.getdefaultlocale(['LANGUAGE', 'LC_ALL', 'LC_CTYPE',
'LC_MESSAGES', 'LANG'])[0]
except:
pass # This happens on Ubuntu apparently
if lang is None and os.environ.has_key('LANG'): # Needed for OS X
try:
lang = os.environ['LANG']
except:
pass
if lang:
lang = lang.replace('-', '_')
lang = '_'.join(lang.split('_')[:2])
return lang
def get_lang(): def get_lang():
'Try to figure out what language to display the interface in' 'Try to figure out what language to display the interface in'
from calibre.utils.config_base import prefs from calibre.utils.config_base import prefs
@ -30,15 +58,11 @@ def get_lang():
if lang: if lang:
return lang return lang
try: try:
lang = locale.getdefaultlocale(['LANGUAGE', 'LC_ALL', 'LC_CTYPE', lang = get_system_locale()
'LC_MESSAGES', 'LANG'])[0]
except: except:
pass # This happens on Ubuntu apparently import traceback
if lang is None and os.environ.has_key('LANG'): # Needed for OS X traceback.print_exc()
try: lang = None
lang = os.environ['LANG']
except:
pass
if lang: if lang:
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang) match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
if match: if match:
@ -55,7 +79,7 @@ def get_lc_messages_path(lang):
if lang in available_translations(): if lang in available_translations():
hlang = lang hlang = lang
else: else:
xlang = lang.split('_')[0] xlang = lang.split('_')[0].lower()
if xlang in available_translations(): if xlang in available_translations():
hlang = xlang hlang = xlang
return hlang return hlang