added ebooksgratuits.com in the stores of "Get Books"

This commit is contained in:
Florent FAYOLLE 2012-08-16 22:37:21 +02:00
commit e7280044d5
44 changed files with 2823 additions and 926 deletions

View File

@ -16,6 +16,7 @@ class BusinessSpectator(BasicNewsRecipe):
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
auto_cleanup = True
#delay = 1
use_embedded_content = False
encoding = 'utf8'
@ -32,11 +33,11 @@ class BusinessSpectator(BasicNewsRecipe):
,'linearize_tables': False
}
keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
#keep_only_tags = [dict(id='storyHeader'), dict(id='body-html')]
remove_tags = [dict(attrs={'class':'hql'})]
#remove_tags = [dict(attrs={'class':'hql'})]
remove_attributes = ['width','height','style']
#remove_attributes = ['width','height','style']
feeds = [
('Top Stories', 'http://www.businessspectator.com.au/top-stories.rss'),
@ -46,3 +47,4 @@ class BusinessSpectator(BasicNewsRecipe):
('Daily Dossier', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=kgb&cat=dossier'),
('Australia', 'http://www.businessspectator.com.au/bs.nsf/RSS?readform&type=region&cat=australia'),
]

View File

@ -1,35 +1,320 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
www.canada.com
'''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class CalgaryHerald(BasicNewsRecipe):
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe):
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following six lines for the Calgary Herald
title = u'Calgary Herald'
oldest_article = 3
max_articles_per_feed = 100
url_prefix = 'http://www.calgaryherald.com'
description = u'News from Calgary, AB'
std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
logo_url = 'chlogo.jpg'
fp_tag = 'CAN_CH'
feeds = [
(u'News', u'http://rss.canada.com/get/?F233'),
(u'Calgary', u'http://www.calgaryherald.com/scripts/sp6query.aspx?catalog=cahr&tags=keyword|calgary&output=rss?link=http%3a%2f%2fwww.calgaryherald'),
(u'Alberta', u'http://www.calgaryherald.com/scripts/Sp6Query.aspx?catalog=CAHR&tags=Keyword|Alberta&output=rss?link=http%3A%2F%2Fwww.calgaryherald.com%2Fnews%2Falberta%2Findex.html'),
(u'Politics', u'http://rss.canada.com/get/?F7551'),
(u'National', u'http://rss.canada.com/get/?F7552'),
(u'World', u'http://rss.canada.com/get/?F7553'),
]
__author__ = 'rty'
pubisher = 'Calgary Herald'
description = 'Calgary, Alberta, Canada'
category = 'News, Calgary, Alberta, Canada'
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
# un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
timefmt = ' [%b %d]'
encoding = 'utf-8'
conversion_options = {'linearize_tables':True}
##masthead_url = 'http://www.calgaryherald.com/index.html'
keep_only_tags = [
dict(name='div', attrs={'id':'storyheader'}),
dict(name='div', attrs={'id':'storycontent'})
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
]
remove_tags_after = {'class':"story_tool_hr"}
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
articles = {}
ans = []
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,105 +1,141 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
www.canada.com
'''
import re
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist
## title = u'Victoria Times Colonist'
## url_prefix = 'http://www.timescolonist.com'
## description = u'News from Victoria, BC'
## fp_tag = 'CAN_TC'
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),,
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following four lines for the Vancouver Province
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following four lines for the Vancouver Sun
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal
title = u'Edmonton Journal'
url_prefix = 'http://www.edmontonjournal.com'
description = u'News from Edmonton, AB'
fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
# un-comment the following six lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post
## title = u'Regina Leader-Post'
## url_prefix = 'http://www.leaderpost.com'
## description = u'News from Regina, SK'
## fp_tag = ''
# un-comment the following six lines for the Edmonton Journal
title = u'Edmonton Journal'
url_prefix = 'http://www.edmontonjournal.com'
description = u'News from Edmonton, AB'
std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
logo_url = 'ejlogo.jpg'
fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Saskatoon Star-Phoenix
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette
# un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
if self.fp_tag=='':
return None
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {}
key = 'News'
ans = ['News']
ans = []
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,48 +1,320 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
www.canada.com
'''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe):
# un-comment the following three lines for the Montreal Gazette
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following six lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following six lines for the Montreal Gazette
title = u'Montreal Gazette'
url_prefix = 'http://www.montrealgazette.com'
description = u'News from Montreal, QC'
std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
logo_url = 'mglogo.jpg'
fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
auto_cleanup = True
auto_cleanup_keep = '//*[@id="imageBox"]'
timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }'''
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
feeds = [
('News',
'http://rss.canada.com/get/?F297'),
('Sports',
'http://rss.canada.com/get/?F299'),
('Entertainment',
'http://rss.canada.com/get/?F7366'),
('Business',
'http://rss.canada.com/get/?F6939'),
]
def parse_index(self):
articles = {}
ans = []
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,105 +1,141 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
www.canada.com
'''
import re
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist
## title = u'Victoria Times Colonist'
## url_prefix = 'http://www.timescolonist.com'
## description = u'News from Victoria, BC'
## fp_tag = 'CAN_TC'
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following four lines for the Vancouver Province
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following four lines for the Vancouver Sun
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
# un-comment the following six lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post
## title = u'Regina Leader-Post'
## url_prefix = 'http://www.leaderpost.com'
## description = u'News from Regina, SK'
## fp_tag = ''
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Saskatoon Star-Phoenix
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
# un-comment the following six lines for the Ottawa Citizen
title = u'Ottawa Citizen'
url_prefix = 'http://www.ottawacitizen.com'
description = u'News from Ottawa, ON'
std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
logo_url = 'oclogo.jpg'
fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette
# un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
if self.fp_tag=='':
return None
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {}
key = 'News'
ans = ['News']
ans = []
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,136 +1,320 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
www.canada.com
'''
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe):
# un-comment the following three lines for the Vancouver Province
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following six lines for the Vancouver Province
title = u'Vancouver Province'
url_prefix = 'http://www.theprovince.com'
description = u'News from Vancouver, BC'
std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
logo_url = 'vplogo.jpg'
fp_tag = 'CAN_TP'
# un-comment the following three lines for the Vancouver Sun
#title = u'Vancouver Sun'
#url_prefix = 'http://www.vancouversun.com'
#description = u'News from Vancouver, BC'
# un-comment the following six lines for the Vancouver Sun
## title = u'Vancouver Sun'
## url_prefix = 'http://www.vancouversun.com'
## description = u'News from Vancouver, BC'
## std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
## logo_url = 'vslogo.jpg'
## fp_tag = 'CAN_VS'
# un-comment the following three lines for the Edmonton Journal
#title = u'Edmonton Journal'
#url_prefix = 'http://www.edmontonjournal.com'
#description = u'News from Edmonton, AB'
# un-comment the following six lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following three lines for the Calgary Herald
#title = u'Calgary Herald'
#url_prefix = 'http://www.calgaryherald.com'
#description = u'News from Calgary, AB'
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following three lines for the Regina Leader-Post
#title = u'Regina Leader-Post'
#url_prefix = 'http://www.leaderpost.com'
#description = u'News from Regina, SK'
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following three lines for the Saskatoon Star-Phoenix
#title = u'Saskatoon Star-Phoenix'
#url_prefix = 'http://www.thestarphoenix.com'
#description = u'News from Saskatoon, SK'
# un-comment the following three lines for the Windsor Star
#title = u'Windsor Star'
#url_prefix = 'http://www.windsorstar.com'
#description = u'News from Windsor, ON'
# un-comment the following three lines for the Ottawa Citizen
#title = u'Ottawa Citizen'
#url_prefix = 'http://www.ottawacitizen.com'
#description = u'News from Ottawa, ON'
# un-comment the following three lines for the Montreal Gazette
#title = u'Montreal Gazette'
#url_prefix = 'http://www.montrealgazette.com'
#description = u'News from Montreal, QC'
# un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
try:
br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
try:
br.open(cover)
except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable")
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
# Replace rsquo (\x92)
fixed = re.sub("\x92","",fixed)
# Replace ldquo (\x93)
fixed = re.sub("\x93","“",fixed)
# Replace rdquo (\x94)
fixed = re.sub("\x94","”",fixed)
# Replace ndash (\x96)
fixed = re.sub("\x96","",fixed)
# Replace mdash (\x97)
fixed = re.sub("\x97","—",fixed)
fixed = re.sub("&#x2019;","",fixed)
return fixed
def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&'
massaged = re.sub("&","&", massaged)
return self.fixChars(massaged)
else:
return description
def populate_article_metadata(self, article, soup, first):
if first:
picdiv = soup.find('body').find('img')
if picdiv is not None:
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
xtitle = article.text_summary.strip()
if len(xtitle) == 0:
desc = soup.find('meta',attrs={'property':'og:description'})
if desc is not None:
article.summary = article.text_summary = desc['content']
def strip_anchors(self,soup):
paras = soup.findAll(True)
for para in paras:
aTags = para.findAll('a')
for a in aTags:
if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete iempty id attributes--they screw up the TOC for unknow reasons
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
return soup
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {}
key = 'News'
ans = ['News']
ans = []
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,105 +1,141 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__license__ = 'GPL v3'
'''
www.canada.com
'''
import re
import string, re
from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
class CanWestPaper(BasicNewsRecipe):
# un-comment the following four lines for the Victoria Times Colonist
## title = u'Victoria Times Colonist'
## url_prefix = 'http://www.timescolonist.com'
## description = u'News from Victoria, BC'
## fp_tag = 'CAN_TC'
postmedia_index_pages = [
(u'Headlines',u'/index.html'),
(u'Ottawa & Area',u'/news/ottawa/index.html'),
(u'Vancouver',u'/news/vancouver/index.html'),
(u'Calgary',u'/news/calgary/index.html'),
(u'Edmonton',u'/news/edmonton/index.html'),
(u'Montreal',u'/news/montreal/index.html'),
(u'Fraser Valley',u'/news/fraser-valley/index.html'),
(u'British Columbia',u'/news/bc/index.html'),
(u'Alberta',u'/news/alberta/index.html'),
(u'Canada',u'/news/canada/index.html'),
(u'National',u'/news/national/index.html'),
(u'Politics',u'/news/politics/index.html'),
(u'Insight',u'/news/insight/index.html'),
(u'Special Reports',u'/news/specialreports/index.html'),
(u'Gangs',u'/news/gangs/index.html'),
(u'Education',u'/news/education/index.html'),
(u'Health',u'/news/health/index.html'),
(u'Environment',u'/news/environment/index.html'),
(u'World',u'/news/world/index.html'),
(u'Police Blotter',u'/news/crime-and-justice/index.html'),
(u'Crime',u'/news/blotter/index.html'),
(u'Around Town',u'/news/topic.html?t=keyword&q=Around+Town'),
(u'Diplomatica',u'/news/diplomatica/index.html'),
(u'Opinion',u'/opinion/index.html'),
(u'Columnists',u'/columnists/index.html'),
(u'Editorials',u'/opinion/editorials/index.html'),
(u'Letters',u'/opinion/letters/index.html'),
(u'Business',u'/business/index.html'),
(u'Sports',u'/sports/index.html'),
(u'Arts',u'/entertainment/index.html'),
(u'Life',u'/life/index.html'),
(u'Technology',u'/technology/index.html'),
(u'Travel',u'/travel/index.html'),
(u'Health',u'/health/index.html')
]
# un-comment the following four lines for the Vancouver Province
# un-comment the following six lines for the Vancouver Province
## title = u'Vancouver Province'
## url_prefix = 'http://www.theprovince.com'
## description = u'News from Vancouver, BC'
## fp_tag = 'CAN_VP'
## std_logo_url = 'http://www.theprovince.com/images/logo_theprovince.jpg'
## logo_url = 'vplogo.jpg'
## fp_tag = 'CAN_TP'
# un-comment the following four lines for the Vancouver Sun
# un-comment the following six lines for the Vancouver Sun
title = u'Vancouver Sun'
url_prefix = 'http://www.vancouversun.com'
description = u'News from Vancouver, BC'
std_logo_url = 'http://www.vancouversun.com/images/logo_vancouversun.jpg'
logo_url = 'vslogo.jpg'
fp_tag = 'CAN_VS'
# un-comment the following four lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Calgary Herald
# un-comment the following six lines for the Calgary Herald
## title = u'Calgary Herald'
## url_prefix = 'http://www.calgaryherald.com'
## description = u'News from Calgary, AB'
## std_logo_url = 'http://www.calgaryherald.com/images/logo_calgaryherald.jpg'
## logo_url = 'chlogo.jpg'
## fp_tag = 'CAN_CH'
# un-comment the following four lines for the Regina Leader-Post
## title = u'Regina Leader-Post'
## url_prefix = 'http://www.leaderpost.com'
## description = u'News from Regina, SK'
## fp_tag = ''
# un-comment the following six lines for the Edmonton Journal
## title = u'Edmonton Journal'
## url_prefix = 'http://www.edmontonjournal.com'
## description = u'News from Edmonton, AB'
## std_logo_url = 'http://www.edmontonjournal.com/images/logo_edmontonjournal.jpg'
## logo_url = 'ejlogo.jpg'
## fp_tag = 'CAN_EJ'
# un-comment the following four lines for the Saskatoon Star-Phoenix
## title = u'Saskatoon Star-Phoenix'
## url_prefix = 'http://www.thestarphoenix.com'
## description = u'News from Saskatoon, SK'
## fp_tag = ''
# un-comment the following four lines for the Windsor Star
## title = u'Windsor Star'
## url_prefix = 'http://www.windsorstar.com'
## description = u'News from Windsor, ON'
## fp_tag = 'CAN_'
# un-comment the following four lines for the Ottawa Citizen
# un-comment the following six lines for the Ottawa Citizen
## title = u'Ottawa Citizen'
## url_prefix = 'http://www.ottawacitizen.com'
## description = u'News from Ottawa, ON'
## std_logo_url = 'http://www.ottawacitizen.com/images/logo_ottawacitizen.jpg'
## logo_url = 'oclogo.jpg'
## fp_tag = 'CAN_OC'
# un-comment the following four lines for the Montreal Gazette
# un-comment the following six lines for the Montreal Gazette
## title = u'Montreal Gazette'
## url_prefix = 'http://www.montrealgazette.com'
## description = u'News from Montreal, QC'
## std_logo_url = 'http://www.montrealgazette.com/images/logo_montrealgazette.jpg'
## logo_url = 'mglogo.jpg'
## fp_tag = 'CAN_MG'
Kindle_Fire=False
masthead_url = std_logo_url
url_list = []
language = 'en_CA'
__author__ = 'Nick Redding'
no_stylesheets = True
timefmt = ' [%b %d]'
encoding = 'utf-8'
extra_css = '''
.timestamp { font-size:xx-small; display: block; }
#storyheader { font-size: medium; }
#storyheader h1 { font-size: x-large; }
#storyheader h2 { font-size: large; font-style: italic; }
#storyheader h2 { font-size: small; font-style: italic; }
.byline { font-size:xx-small; }
#photocaption { font-size: small; font-style: italic }
#photocredit { font-size: xx-small; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
#photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
.photocaption { font-size: xx-small; font-style: italic; font-weight: normal; }
#photocredit { font-size: xx-small; font-weight: normal; }'''
keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'}),dict(name='div', attrs={'id':'storyphoto'})]
remove_tags = [{'class':'comments'},
dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
dict(name='h2', attrs={'id':'photocredit'}),
dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
dict(name='div', attrs={'class':'rule_grey_solid'}),
dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
def get_cover_url(self):
from datetime import timedelta, date
if self.fp_tag=='':
return None
from datetime import timedelta, datetime, date
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.fp_tag+'.jpg'
br = BasicNewsRecipe.get_browser()
daysback=1
@ -120,6 +156,19 @@ class CanWestPaper(BasicNewsRecipe):
cover = None
return cover
def prepare_masthead_image(self, path_to_image, out_path):
if self.Kindle_Fire:
from calibre import fit_image
from calibre.utils.magick import Image, create_canvas
img = Image()
img.open(path_to_image)
width, height = img.size
img2 = create_canvas(width, height)
img2.compose(img)
img2.save(out_path)
else:
BasicNewsRecipe.prepare_masthead_image(self, path_to_image, out_path)
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
@ -166,55 +215,106 @@ class CanWestPaper(BasicNewsRecipe):
a.replaceWith(a.renderContents().decode('cp1252','replace'))
return soup
def preprocess_html(self,soup):
#delete empty id attributes--they screw up the TOC for unknown reasons
divtags = soup.findAll('div',attrs={'id':''})
if divtags:
for div in divtags:
del(div['id'])
pgall = soup.find('div',attrs={'id':'storyphoto'})
if pgall is not None: # photo gallery perhaps
if (soup.find('div',attrs={'id':'storycontent'}) is None):
allpics = Tag(soup,'div')
first_img = pgall.find('div','storyimage')
if first_img is not None:
first_img.extract()
tlist = pgall.find('div',attrs={'id':'relatedthumbs'})
if tlist is not None:
for atag in tlist.findAll('a'):
img = Tag(soup,'img')
srcpre, sep, srcpost = atag.img['src'].partition('?')
img['src'] = srcpre
pdesc = Tag(soup,'p')
pdesc.insert(0,atag.img['alt'])
pdesc['class']='photocaption'
div = Tag(soup,'div')
div.insert(0,pdesc)
div.insert(0,img)
allpics.append(div)
pgall.replaceWith(allpics)
for pg in soup.findAll('div',attrs={'id':'storyphoto'}):
pg.extract()
return self.strip_anchors(soup)
def parse_index(self):
soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
articles = {}
key = 'News'
ans = ['News']
ans = []
# Find each instance of class="sectiontitle", class="featurecontent"
for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
#self.log(" div class = %s" % divtag['class'])
if divtag['class'].startswith('section_title'):
# div contains section title
if not divtag.h3:
continue
key = self.tag_to_string(divtag.h3,False)
ans.append(key)
self.log("Section name %s" % key)
continue
# div contains article data
h1tag = divtag.find('h1')
if not h1tag:
continue
atag = h1tag.find('a',href=True)
if not atag:
continue
url = self.url_prefix+'/news/todays-paper/'+atag['href']
#self.log("Section %s" % key)
#self.log("url %s" % url)
def handle_article(adiv,key):
h1tag = adiv.h1
if h1tag is not None:
atag = h1tag.a
if atag is not None:
url = atag['href']
if atag['href'].startswith('http'):
return
elif atag['href'].startswith('/'):
url = self.url_prefix+atag['href']
else:
url = self.url_prefix+'/'+atag['href']
if url in self.url_list:
return
self.url_list.append(url)
title = self.tag_to_string(atag,False)
#self.log("title %s" % title)
pubdate = ''
if 'VIDEO' in title.upper():
return
if 'GALLERY' in title.upper():
return
if 'PHOTOS' in title.upper():
return
dtag = adiv.find('div','content')
description=''
ptag = divtag.find('p');
if ptag:
description = self.tag_to_string(ptag,False)
#self.log("description %s" % description)
author = ''
autag = divtag.find('h4')
if autag:
author = self.tag_to_string(autag,False)
#self.log("author %s" % author)
print("URL "+url)
print("TITLE "+title)
if dtag is not None:
stag = dtag.span
if stag is not None:
if stag['class'] != 'timestamp':
description = self.tag_to_string(stag,False)
else:
description = self.tag_to_string(dtag,False)
print("DESCRIPTION: "+description)
if not articles.has_key(key):
articles[key] = []
articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
articles[key].append(dict(title=title,url=url,date='',description=description,author='',content=''))
def parse_web_index(key, keyurl):
try:
soup = self.index_to_soup(self.url_prefix+keyurl)
except:
return
ans.append(key)
mainsoup = soup.find('div','bodywrapper')
footer = mainsoup.find(attrs={'id':'footerfeature'})
if footer is not None:
footer.extract()
print("Section: "+key)
for wdiv in mainsoup.findAll('div',attrs={'id':re.compile('^HorizontalFeatureSlider_1_Story')}):
handle_article(wdiv,key)
wdiv.extract()
for wdiv in mainsoup.findAll(attrs={'id':['featurewidget','textfeature','textlinks_timestamp']}):
for adiv in wdiv.findAll('div','featurecontent'):
handle_article(adiv,key)
for (k,url) in self.postmedia_index_pages:
parse_web_index(k,url)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans

View File

@ -1,35 +1,229 @@
body { background-color: white; }
/*
** The following rules apply principally to the line items shown in the
** Authors, Titles, Genres, Series, and Recently Added sections. Rules for the
** Descriptions section are grouped together later in the file.
** ------------------------------------------------------------------------
*/
/*
** <div> grouping an author's works together
** Used in Sections:
** Authors
**
** Minimize widows and orphans by logically grouping chunks
** Some reports of problems with Sony (ADE) ereaders
** ADE: page-break-inside:avoid;
** iBooks: display:inline-block;
** width:100%;
*/
div.author_logical_group {
page-break-inside:avoid;
}
/*
** Force page break when starting new initial letter
** Used in Sections:
** Authors
** Titles
*/
div.initial_letter {
page-break-before:always;
}
/*
** Author name
** Used in Sections:
** Authors
** Genres
** Recently Added
*/
p.author_index {
clear:both;
font-size:large;
font-weight:bold;
text-align:left;
margin-top:0.25px;
margin-bottom:-2px;
text-indent: 0em;
}
/*
** Index letter
** Used in Sections:
** Authors
** Titles
*/
p.author_title_letter_index {
clear:both;
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:0px;
margin-bottom:0px;
}
/*
** Index letter
** Used in Sections:
** Series
*/
p.series_letter_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
/*
** Month-Year
** Used in Sections:
** Recently Added
*/
p.date_index {
clear:both;
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
p.date_read {
clear:both;
text-align:left;
margin-top:0px;
margin-bottom:0px;
margin-left:6em;
text-indent:-6em;
}
/*
** Series name
** Used in Sections:
** Authors
** Series
** Genres
** Recently Added
** Optimized for ePub
*/
p.series {
clear:both;
font-style:italic;
margin-top:0.10em;
margin-bottom:0em;
margin-left:1.5em;
text-align:left;
text-indent:-1.25em;
}
/*
** Series name
** Used in Sections:
** Authors
** Series
** Genres
** Recently Added
** Optimized for mobi
*/
p.series_mobi {
clear:both;
font-style:italic;
margin-top:0em;
margin-bottom:0em;
margin-left:0em;
text-align:left;
text-indent:-30px;
}
/*
** Section title
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
** Descriptions
*/
p.title {
margin-top:0em;
margin-bottom:0em;
text-align:center;
font-style:italic;
font-size:xx-large;
}
/*
** Line item book listing
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
*/
p.line_item {
clear: both;
font-family:monospace;
margin-top:0px;
margin-bottom:0px;
margin-left:2em;
text-align:left;
text-indent:-2em;
}
/*
** Prefix
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
*/
span.prefix {
float:left;
margin-left: 0.25em;
text-align: left;
vertical-align: middle;
width: 1.5em;
}
/*
** Book details entry
** Used in Sections:
** Authors
** Titles
** Series
** Genres
** Recently Added
*/
span.entry {
font-family: serif;
vertical-align:middle;
}
/*
** The following rules apply to Descriptions
** -----------------------------------------
*/
/*
** Link to Series
*/
a.series_id {
font-style:normal;
font-size:large;
}
/*
* Minimize widows and orphans by logically grouping chunks
* Some reports of problems with Sony (ADE) ereaders
* ADE: page-break-inside:avoid;
* iBooks: display:inline-block;
* width:100%;
** Various dividers
*/
div.author_logical_group {
page-break-inside:avoid;
}
div.description > p:first-child {
margin: 0 0 0 0;
text-indent: 0em;
}
div.description {
margin: 0 0 0 0;
text-indent: 1em;
}
div.initial_letter {
page-break-before:always;
}
hr.annotations_divider {
width:50%;
margin-left:1em;
@ -63,47 +257,21 @@ hr.merged_comments_divider {
border-left: solid white 0px;
}
p.date_read {
text-align:left;
margin-top:0px;
margin-bottom:0px;
margin-left:6em;
text-indent:-6em;
}
/*
** Author name
*/
p.author {
clear:both;
font-size:large;
margin-top:0em;
margin-bottom:0em;
margin-bottom:0.1em;
text-align: center;
text-indent: 0em;
}
p.author_index {
font-size:large;
font-weight:bold;
text-align:left;
margin-top:0.25px;
margin-bottom:-2px;
text-indent: 0em;
}
p.author_title_letter_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:0px;
margin-bottom:0px;
}
p.date_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
/*
** Formats
*/
p.formats {
font-size:90%;
margin-top:0em;
@ -112,6 +280,9 @@ p.formats {
text-indent: 0.0in;
}
/*
** Genres
*/
p.genres {
font-style:normal;
margin-top:0.5em;
@ -120,68 +291,55 @@ p.genres {
text-indent: 0.0in;
}
p.series {
font-style:italic;
margin-top:0.25em;
margin-bottom:0em;
margin-left:2em;
text-align:left;
text-indent:-2em;
}
/*
** Series name
*/
p.series_id {
margin-top:0em;
margin-bottom:0em;
text-align:center;
}
p.series_letter_index {
font-size:x-large;
text-align:center;
font-weight:bold;
margin-top:1em;
margin-bottom:0px;
}
p.title {
margin-top:0em;
margin-bottom:0em;
text-align:center;
font-style:italic;
font-size:xx-large;
}
p.wishlist_item, p.unread_book, p.read_book, p.line_item {
font-family:monospace;
margin-top:0px;
margin-bottom:0px;
margin-left:2em;
text-align:left;
text-indent:-2em;
}
span.prefix {}
span.entry {
font-family: serif;
}
/*
* Book Descriptions
** Publisher, Publication Date
*/
td.publisher, td.date {
font-weight:bold;
text-align:center;
}
/*
** Rating
*/
td.rating{
text-align:center;
}
/*
** Additional notes
*/
td.notes {
font-size: 100%;
text-align:center;
}
/*
** Thumbnail
*/
td.thumbnail img {
-webkit-box-shadow: 4px 4px 12px #999;
}
/*
** Comments
*/
div.description {
margin: 0 0 0 0;
text-indent: 1em;
}
div.description > p:first-child {
margin: 0 0 0 0;
text-indent: 0em;
}

View File

@ -172,6 +172,7 @@ if iswindows:
[
'calibre/devices/mtp/windows/utils.cpp',
'calibre/devices/mtp/windows/device_enumeration.cpp',
'calibre/devices/mtp/windows/device.cpp',
'calibre/devices/mtp/windows/wpd.cpp',
],
headers=[
@ -298,6 +299,7 @@ class Build(Command):
self.obj_dir = os.path.join(os.path.dirname(SRC), 'build', 'objects')
if not os.path.exists(self.obj_dir):
os.makedirs(self.obj_dir)
if not opts.only:
self.build_style(self.j(self.SRC, 'calibre', 'plugins'))
for ext in extensions:
if opts.only != 'all' and opts.only != ext.name:

View File

@ -38,7 +38,7 @@ binary_includes = [
'/lib/libz.so.1',
'/usr/lib/libtiff.so.5',
'/lib/libbz2.so.1',
'/usr/lib/libpoppler.so.25',
'/usr/lib/libpoppler.so.27',
'/usr/lib/libxml2.so.2',
'/usr/lib/libopenjpeg.so.2',
'/usr/lib/libxslt.so.1',

View File

@ -379,7 +379,7 @@ class Py2App(object):
@flush
def add_poppler(self):
info('\nAdding poppler')
for x in ('libpoppler.26.dylib',):
for x in ('libpoppler.27.dylib',):
self.install_dylib(os.path.join(SW, 'lib', x))
for x in ('pdftohtml', 'pdftoppm', 'pdfinfo'):
self.install_dylib(os.path.join(SW, 'bin', x), False)

View File

@ -28,7 +28,8 @@ isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isfreebsd = 'freebsd' in _plat
isnetbsd = 'netbsd' in _plat
isbsd = isfreebsd or isnetbsd
isdragonflybsd = 'dragonfly' in _plat
isbsd = isfreebsd or isnetbsd or isdragonflybsd
islinux = not(iswindows or isosx or isbsd)
isfrozen = hasattr(sys, 'frozen')
isunix = isosx or islinux
@ -215,3 +216,13 @@ def get_windows_temp_path():
ans = buf.value
return ans if ans else None
def get_windows_user_locale_name():
import ctypes
k32 = ctypes.windll.kernel32
n = 200
buf = ctypes.create_unicode_buffer(u'\0'*n)
n = k32.GetUserDefaultLocaleName(buf, n)
if n == 0:
return None
return u'_'.join(buf.value.split(u'-')[:2])

View File

@ -1353,7 +1353,7 @@ class StoreEbookscomStore(StoreBase):
class StoreEbooksGratuitsStore(StoreBase):
name = 'EbooksGratuits.com'
description = u''
description = u'Ebooks Libres et Gratuits'
actual_plugin = 'calibre.gui2.store.stores.ebooksgratuits_plugin:EbooksGratuitsStore'
headquarters = 'FR'

View File

@ -87,7 +87,7 @@ class ANDROID(USBMS):
# Google
0x18d1 : {
0x0001 : [0x0223, 0x9999],
0x0001 : [0x0223, 0x230, 0x9999],
0x0003 : [0x0230],
0x4e11 : [0x0100, 0x226, 0x227],
0x4e12 : [0x0100, 0x226, 0x227],
@ -196,7 +196,7 @@ class ANDROID(USBMS):
'GENERIC-', 'ZTE', 'MID', 'QUALCOMM', 'PANDIGIT', 'HYSTON',
'VIZIO', 'GOOGLE', 'FREESCAL', 'KOBO_INC', 'LENOVO', 'ROCKCHIP',
'POCKET', 'ONDA_MID', 'ZENITHIN', 'INGENIC', 'PMID701C', 'PD',
'PMP5097C', 'MASS', 'NOVO7', 'ZEKI']
'PMP5097C', 'MASS', 'NOVO7', 'ZEKI', 'COBY']
WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID',
@ -214,7 +214,8 @@ class ANDROID(USBMS):
'KTABLET_PC', 'INGENIC', 'GT-I9001_CARD', 'USB_2.0_DRIVER',
'GT-S5830L_CARD', 'UNIVERSE', 'XT875', 'PRO', '.KOBO_VOX',
'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID']
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
'S5830I_CARD', 'MID7042']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -224,7 +225,7 @@ class ANDROID(USBMS):
'USB_2.0_DRIVER', 'I9100T', 'P999DW_SD_CARD', 'KTABLET_PC',
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0_DRIVER', 'XT875',
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
'USB_FLASH_DRIVER', 'ANDROID']
'USB_FLASH_DRIVER', 'ANDROID', 'MID7042']
OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -92,6 +92,7 @@ class ControlError(ProtocolError):
def __init__(self, query=None, response=None, desc=None):
self.query = query
self.response = response
self.desc = desc
ProtocolError.__init__(self, desc)
def __str__(self):

View File

@ -39,6 +39,7 @@ class MTPDeviceBase(DevicePlugin):
def __init__(self, *args, **kwargs):
DevicePlugin.__init__(self, *args, **kwargs)
self.progress_reporter = None
self.current_friendly_name = None
def reset(self, key='-1', log_packets=False, report_progress=None,
detected_device=None):
@ -47,3 +48,7 @@ class MTPDeviceBase(DevicePlugin):
def set_progress_reporter(self, report_progress):
self.progress_reporter = report_progress
def get_gui_name(self):
return self.current_friendly_name or self.name

View File

@ -14,7 +14,7 @@ from collections import deque, OrderedDict
from io import BytesIO
from calibre import prints
from calibre.devices.errors import OpenFailed
from calibre.devices.errors import OpenFailed, DeviceError
from calibre.devices.mtp.base import MTPDeviceBase, synchronous
from calibre.devices.mtp.unix.detect import MTPDetect
@ -102,11 +102,6 @@ class MTP_DEVICE(MTPDeviceBase):
if self.progress_reporter is not None:
self.progress_reporter(p)
@synchronous
def get_gui_name(self):
if self.dev is None or not self.dev.friendly_name: return self.name
return self.dev.friendly_name
@synchronous
def is_usb_connected(self, devices_on_system, debug=False,
only_presence=False):
@ -134,7 +129,7 @@ class MTP_DEVICE(MTPDeviceBase):
@synchronous
def post_yank_cleanup(self):
self.dev = self.filesystem_cache = None
self.dev = self.filesystem_cache = self.current_friendly_name = None
@synchronous
def startup(self):
@ -184,15 +179,18 @@ class MTP_DEVICE(MTPDeviceBase):
self._carda_id = storage[1]['id']
if len(storage) > 2:
self._cardb_id = storage[2]['id']
self.current_friendly_name = self.dev.name
@synchronous
def read_filesystem_cache(self):
try:
files, errs = self.dev.get_filelist(self)
if errs and not files:
raise OpenFailed('Failed to read files from device. Underlying errors:\n'
raise DeviceError('Failed to read files from device. Underlying errors:\n'
+self.format_errorstack(errs))
folders, errs = self.dev.get_folderlist()
if errs and not folders:
raise OpenFailed('Failed to read folders from device. Underlying errors:\n'
raise DeviceError('Failed to read folders from device. Underlying errors:\n'
+self.format_errorstack(errs))
self.filesystem_cache = FilesystemCache(files, folders)
except:
@ -202,15 +200,15 @@ class MTP_DEVICE(MTPDeviceBase):
@synchronous
def get_device_information(self, end_session=True):
d = self.dev
return (d.friendly_name, d.device_version, d.device_version, '')
return (self.current_friendly_name, d.device_version, d.device_version, '')
@synchronous
def card_prefix(self, end_session=True):
ans = [None, None]
if self._carda_id is not None:
ans[0] = 'mtp:%d:'%self._carda_id
ans[0] = 'mtp:::%d:::'%self._carda_id
if self._cardb_id is not None:
ans[1] = 'mtp:%d:'%self._cardb_id
ans[1] = 'mtp:::%d:::'%self._cardb_id
return tuple(ans)
@synchronous
@ -248,6 +246,7 @@ if __name__ == '__main__':
devs = linux_scanner()
mtp_devs = dev.detect(devs)
dev.open(list(mtp_devs)[0], 'xxx')
dev.read_filesystem_cache()
d = dev.dev
print ("Opened device:", dev.get_gui_name())
print ("Storage info:")

View File

@ -1,3 +1,11 @@
/*
* libmtp.c
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define UNICODE
#include <Python.h>

View File

@ -0,0 +1,137 @@
/*
* device.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#include "global.h"
extern IPortableDevice* wpd::open_device(const wchar_t *pnp_id, IPortableDeviceValues *client_information);
extern IPortableDeviceValues* wpd::get_client_information();
extern PyObject* wpd::get_device_information(IPortableDevice *device);
using namespace wpd;
// Device.__init__() {{{
static void
dealloc(Device* self)
{
if (self->pnp_id != NULL) free(self->pnp_id);
self->pnp_id = NULL;
if (self->device != NULL) {
Py_BEGIN_ALLOW_THREADS;
self->device->Close(); self->device->Release();
self->device = NULL;
Py_END_ALLOW_THREADS;
}
if (self->client_information != NULL) { self->client_information->Release(); self->client_information = NULL; }
Py_XDECREF(self->device_information); self->device_information = NULL;
self->ob_type->tp_free((PyObject*)self);
}
static int
init(Device *self, PyObject *args, PyObject *kwds)
{
PyObject *pnp_id;
int ret = -1;
if (!PyArg_ParseTuple(args, "O", &pnp_id)) return -1;
self->pnp_id = unicode_to_wchar(pnp_id);
if (self->pnp_id == NULL) return -1;
self->client_information = get_client_information();
if (self->client_information != NULL) {
self->device = open_device(self->pnp_id, self->client_information);
if (self->device != NULL) {
self->device_information = get_device_information(self->device);
if (self->device_information != NULL) ret = 0;
}
}
return ret;
}
// }}}
// update_device_data() {{{
static PyObject*
update_data(Device *self, PyObject *args, PyObject *kwargs) {
PyObject *di = NULL;
di = get_device_information(self->device);
if (di == NULL) return NULL;
Py_XDECREF(self->device_information); self->device_information = di;
Py_RETURN_NONE;
} // }}}
static PyMethodDef Device_methods[] = {
{"update_data", (PyCFunction)update_data, METH_VARARGS,
"update_data() -> Reread the basic device data from the device (total, space, free space, storage locations, etc.)"
},
{NULL}
};
// Device.data {{{
static PyObject *
Device_data(Device *self, void *closure) {
Py_INCREF(self->device_information); return self->device_information;
} // }}}
static PyGetSetDef Device_getsetters[] = {
{(char *)"data",
(getter)Device_data, NULL,
(char *)"The basic device information.",
NULL},
{NULL} /* Sentinel */
};
PyTypeObject wpd::DeviceType = { // {{{
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"wpd.Device", /*tp_name*/
sizeof(Device), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)dealloc, /*tp_dealloc*/
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
"Device", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Device_methods, /* tp_methods */
0, /* tp_members */
Device_getsetters, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
}; // }}}

View File

@ -2,7 +2,7 @@
* device_enumeration.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the MIT license.
* Distributed under terms of the GPL3 license.
*/
#include "global.h"
@ -72,15 +72,131 @@ IPortableDevice *open_device(const wchar_t *pnp_id, IPortableDeviceValues *clien
} // }}}
PyObject* get_storage_info(IPortableDevice *device) { // {{{
HRESULT hr, hr2;
IPortableDeviceContent *content = NULL;
IEnumPortableDeviceObjectIDs *objects = NULL;
IPortableDeviceProperties *properties = NULL;
IPortableDeviceKeyCollection *storage_properties = NULL;
IPortableDeviceValues *values = NULL;
PyObject *ans = NULL, *storage = NULL, *so = NULL, *desc = NULL, *soid = NULL;
DWORD fetched, i;
PWSTR object_ids[10];
GUID guid;
ULONGLONG capacity, free_space, capacity_objects, free_objects;
ULONG access;
LPWSTR storage_desc = NULL;
storage = PyList_New(0);
if (storage == NULL) { PyErr_NoMemory(); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = device->Content(&content);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to get content interface from device", hr); goto end;}
Py_BEGIN_ALLOW_THREADS;
hr = content->Properties(&properties);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to get properties interface", hr); goto end;}
Py_BEGIN_ALLOW_THREADS;
hr = CoCreateInstance(CLSID_PortableDeviceKeyCollection, NULL,
CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&storage_properties));
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to create storage properties collection", hr); goto end;}
Py_BEGIN_ALLOW_THREADS;
hr = storage_properties->Add(WPD_OBJECT_CONTENT_TYPE);
hr = storage_properties->Add(WPD_FUNCTIONAL_OBJECT_CATEGORY);
hr = storage_properties->Add(WPD_STORAGE_DESCRIPTION);
hr = storage_properties->Add(WPD_STORAGE_CAPACITY);
hr = storage_properties->Add(WPD_STORAGE_CAPACITY_IN_OBJECTS);
hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_BYTES);
hr = storage_properties->Add(WPD_STORAGE_FREE_SPACE_IN_OBJECTS);
hr = storage_properties->Add(WPD_STORAGE_ACCESS_CAPABILITY);
hr = storage_properties->Add(WPD_STORAGE_FILE_SYSTEM_TYPE);
hr = storage_properties->Add(WPD_OBJECT_NAME);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to create collection of properties for storage query", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = content->EnumObjects(0, WPD_DEVICE_OBJECT_ID, NULL, &objects);
Py_END_ALLOW_THREADS;
if (FAILED(hr)) {hresult_set_exc("Failed to get objects from device", hr); goto end;}
hr = S_OK;
while (hr == S_OK) {
Py_BEGIN_ALLOW_THREADS;
hr = objects->Next(10, object_ids, &fetched);
Py_END_ALLOW_THREADS;
if (SUCCEEDED(hr)) {
for(i = 0; i < fetched; i++) {
Py_BEGIN_ALLOW_THREADS;
hr2 = properties->GetValues(object_ids[i], storage_properties, &values);
Py_END_ALLOW_THREADS;
if SUCCEEDED(hr2) {
if (
SUCCEEDED(values->GetGuidValue(WPD_OBJECT_CONTENT_TYPE, &guid)) && IsEqualGUID(guid, WPD_CONTENT_TYPE_FUNCTIONAL_OBJECT) &&
SUCCEEDED(values->GetGuidValue(WPD_FUNCTIONAL_OBJECT_CATEGORY, &guid)) && IsEqualGUID(guid, WPD_FUNCTIONAL_CATEGORY_STORAGE)
) {
capacity = 0; capacity_objects = 0; free_space = 0; free_objects = 0;
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY, &capacity);
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_CAPACITY_IN_OBJECTS, &capacity_objects);
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_BYTES, &free_space);
values->GetUnsignedLargeIntegerValue(WPD_STORAGE_FREE_SPACE_IN_OBJECTS, &free_objects);
desc = Py_False;
if (SUCCEEDED(values->GetUnsignedIntegerValue(WPD_STORAGE_ACCESS_CAPABILITY, &access)) && access == WPD_STORAGE_ACCESS_CAPABILITY_READWRITE) desc = Py_True;
soid = PyUnicode_FromWideChar(object_ids[i], wcslen(object_ids[i]));
if (soid == NULL) { PyErr_NoMemory(); goto end; }
so = Py_BuildValue("{s:K,s:K,s:K,s:K,s:O,s:N}",
"capacity", capacity, "capacity_objects", capacity_objects, "free_space", free_space, "free_objects", free_objects, "rw", desc, "id", soid);
if (so == NULL) { PyErr_NoMemory(); goto end; }
if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_DESCRIPTION, &storage_desc))) {
desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
if (desc != NULL) { PyDict_SetItemString(so, "description", desc); Py_DECREF(desc);}
CoTaskMemFree(storage_desc); storage_desc = NULL;
}
if (SUCCEEDED(values->GetStringValue(WPD_OBJECT_NAME, &storage_desc))) {
desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
if (desc != NULL) { PyDict_SetItemString(so, "name", desc); Py_DECREF(desc);}
CoTaskMemFree(storage_desc); storage_desc = NULL;
}
if (SUCCEEDED(values->GetStringValue(WPD_STORAGE_FILE_SYSTEM_TYPE, &storage_desc))) {
desc = PyUnicode_FromWideChar(storage_desc, wcslen(storage_desc));
if (desc != NULL) { PyDict_SetItemString(so, "filesystem", desc); Py_DECREF(desc);}
CoTaskMemFree(storage_desc); storage_desc = NULL;
}
PyList_Append(storage, so);
Py_DECREF(so);
}
}
}
}
}
ans = storage;
end:
if (content != NULL) content->Release();
if (objects != NULL) objects->Release();
if (properties != NULL) properties->Release();
if (storage_properties != NULL) storage_properties->Release();
if (values != NULL) values->Release();
return ans;
} // }}}
PyObject* get_device_information(IPortableDevice *device) { // {{{
IPortableDeviceContent *content = NULL;
IPortableDeviceProperties *properties = NULL;
IPortableDeviceKeyCollection *keys = NULL;
IPortableDeviceValues *values = NULL;
IPortableDeviceCapabilities *capabilities = NULL;
IPortableDevicePropVariantCollection *categories = NULL;
HRESULT hr;
DWORD num_of_categories, i;
LPWSTR temp;
ULONG ti;
PyObject *t, *ans = NULL;
PyObject *t, *ans = NULL, *storage = NULL;
char *type;
Py_BEGIN_ALLOW_THREADS;
@ -117,6 +233,21 @@ PyObject* get_device_information(IPortableDevice *device) { // {{{
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device info", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = device->Capabilities(&capabilities);
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device capabilities", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = capabilities->GetFunctionalCategories(&categories);
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device functional categories", hr); goto end; }
Py_BEGIN_ALLOW_THREADS;
hr = categories->GetCount(&num_of_categories);
Py_END_ALLOW_THREADS;
if(FAILED(hr)) {hresult_set_exc("Failed to get device functional categories number", hr); goto end; }
ans = PyDict_New();
if (ans == NULL) {PyErr_NoMemory(); goto end;}
@ -184,11 +315,34 @@ PyObject* get_device_information(IPortableDevice *device) { // {{{
CoTaskMemFree(temp);
}
t = Py_False;
for (i = 0; i < num_of_categories; i++) {
PROPVARIANT pv;
PropVariantInit(&pv);
if (SUCCEEDED(categories->GetAt(i, &pv)) && pv.puuid != NULL) {
if (IsEqualGUID(WPD_FUNCTIONAL_CATEGORY_STORAGE, *pv.puuid)) {
t = Py_True;
}
}
PropVariantClear(&pv);
if (t == Py_True) break;
}
PyDict_SetItemString(ans, "has_storage", t);
if (t == Py_True) {
storage = get_storage_info(device);
if (storage == NULL) goto end;
PyDict_SetItemString(ans, "storage", storage);
}
end:
if (keys != NULL) keys->Release();
if (values != NULL) values->Release();
if (properties != NULL) properties->Release();
if (content != NULL) content->Release();
if (capabilities != NULL) capabilities->Release();
if (categories != NULL) categories->Release();
return ans;
} // }}}

View File

@ -0,0 +1,200 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import time
from threading import RLock
from calibre import as_unicode, prints
from calibre.constants import plugins, __appname__, numeric_version
from calibre.devices.errors import OpenFailed
from calibre.devices.mtp.base import MTPDeviceBase, synchronous
class MTP_DEVICE(MTPDeviceBase):
supported_platforms = ['windows']
def __init__(self, *args, **kwargs):
MTPDeviceBase.__init__(self, *args, **kwargs)
self.dev = None
self.lock = RLock()
self.blacklisted_devices = set()
self.ejected_devices = set()
self.currently_connected_pnp_id = None
self.detected_devices = {}
self.previous_devices_on_system = frozenset()
self.last_refresh_devices_time = time.time()
self.wpd = self.wpd_error = None
self._main_id = self._carda_id = self._cardb_id = None
@synchronous
def startup(self):
self.wpd, self.wpd_error = plugins['wpd']
if self.wpd is not None:
try:
self.wpd.init(__appname__, *(numeric_version[:3]))
except self.wpd.NoWPD:
self.wpd_error = _(
'The Windows Portable Devices service is not available'
' on your computer. You may need to install Windows'
' Media Player 11 or newer and/or restart your computer')
except Exception as e:
self.wpd_error = as_unicode(e)
@synchronous
def shutdown(self):
self.dev = self.filesystem_cache = None
if self.wpd is not None:
self.wpd.uninit()
@synchronous
def detect_managed_devices(self, devices_on_system):
if self.wpd is None: return None
devices_on_system = frozenset(devices_on_system)
if (devices_on_system != self.previous_devices_on_system or time.time()
- self.last_refresh_devices_time > 10):
self.previous_devices_on_system = devices_on_system
self.last_refresh_devices_time = time.time()
try:
pnp_ids = frozenset(self.wpd.enumerate_devices())
except:
return None
self.detected_devices = {dev:self.detected_devices.get(dev, None)
for dev in pnp_ids}
# Get device data for detected devices. If there is an error, we will
# try again for that device the next time this method is called.
for dev in tuple(self.detected_devices.iterkeys()):
data = self.detected_devices.get(dev, None)
if data is None or data is False:
try:
data = self.wpd.device_info(dev)
except Exception as e:
prints('Failed to get device info for device:', dev,
as_unicode(e))
data = {} if data is False else False
self.detected_devices[dev] = data
# Remove devices that have been disconnected from ejected
# devices and blacklisted devices
self.ejected_devices = set(self.detected_devices).intersection(
self.ejected_devices)
self.blacklisted_devices = set(self.detected_devices).intersection(
self.blacklisted_devices)
if self.currently_connected_pnp_id is not None:
return (self.currently_connected_pnp_id if
self.currently_connected_pnp_id in self.detected_devices
else None)
for dev, data in self.detected_devices.iteritems():
if dev in self.blacklisted_devices or dev in self.ejected_devices:
# Ignore blacklisted and ejected devices
continue
if data and self.is_suitable_wpd_device(data):
return dev
return None
def is_suitable_wpd_device(self, devdata):
# Check that protocol is MTP
protocol = devdata.get('protocol', '').lower()
if not protocol.startswith('mtp:'): return False
# Check that the device has some read-write storage
if not devdata.get('has_storage', False): return False
has_rw_storage = False
for s in devdata.get('storage', []):
if s.get('rw', False):
has_rw_storage = True
break
if not has_rw_storage: return False
return True
@synchronous
def post_yank_cleanup(self):
self.currently_connected_pnp_id = self.current_friendly_name = None
self._main_id = self._carda_id = self._cardb_id = None
self.dev = self.filesystem_cache = None
@synchronous
def eject(self):
if self.currently_connected_pnp_id is None: return
self.ejected_devices.add(self.currently_connected_pnp_id)
self.currently_connected_pnp_id = self.current_friendly_name = None
self._main_id = self._carda_id = self._cardb_id = None
self.dev = self.filesystem_cache = None
@synchronous
def open(self, connected_device, library_uuid):
self.dev = self.filesystem_cache = None
try:
self.dev = self.wpd.Device(connected_device)
except self.wpd.WPDError:
time.sleep(2)
try:
self.dev = self.wpd.Device(connected_device)
except self.wpd.WPDError as e:
self.blacklisted_devices.add(connected_device)
raise OpenFailed('Failed to open %s with error: %s'%(
connected_device, as_unicode(e)))
devdata = self.dev.data
storage = [s for s in devdata.get('storage', []) if s.get('rw', False)]
if not storage:
self.blacklisted_devices.add(connected_device)
raise OpenFailed('No storage found for device %s'%(connected_device,))
self._main_id = storage[0]['id']
if len(storage) > 1:
self._carda_id = storage[1]['id']
if len(storage) > 2:
self._cardb_id = storage[2]['id']
self.current_friendly_name = devdata.get('friendly_name', None)
@synchronous
def get_device_information(self, end_session=True):
d = self.dev.data
dv = d.get('device_version', '')
return (self.current_friendly_name, dv, dv, '')
@synchronous
def card_prefix(self, end_session=True):
ans = [None, None]
if self._carda_id is not None:
ans[0] = 'mtp:::%s:::'%self._carda_id
if self._cardb_id is not None:
ans[1] = 'mtp:::%s:::'%self._cardb_id
return tuple(ans)
@synchronous
def total_space(self, end_session=True):
ans = [0, 0, 0]
dd = self.dev.data
for s in dd.get('storage', []):
i = {self._main_id:0, self._carda_id:1,
self._cardb_id:2}.get(s.get('id', -1), None)
if i is not None:
ans[i] = s['capacity']
return tuple(ans)
@synchronous
def free_space(self, end_session=True):
self.dev.update_data()
ans = [0, 0, 0]
dd = self.dev.data
for s in dd.get('storage', []):
i = {self._main_id:0, self._carda_id:1,
self._cardb_id:2}.get(s.get('id', -1), None)
if i is not None:
ans[i] = s['free_space']
return tuple(ans)

View File

@ -2,7 +2,7 @@
* global.h
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the MIT license.
* Distributed under terms of the GPL3 license.
*/
#pragma once
@ -34,6 +34,18 @@ typedef struct {
} ClientInfo;
extern ClientInfo client_info;
// Device type
typedef struct {
PyObject_HEAD
// Type-specific fields go here.
wchar_t *pnp_id;
IPortableDeviceValues *client_information;
IPortableDevice *device;
PyObject *device_information;
} Device;
extern PyTypeObject DeviceType;
// Utility functions
PyObject *hresult_set_exc(const char *msg, HRESULT hr);
wchar_t *unicode_to_wchar(PyObject *o);

View File

@ -7,39 +7,70 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import subprocess, sys, os
import subprocess, sys, os, pprint, signal, time, glob
pprint
def build():
def build(mod='wpd'):
master = subprocess.Popen('ssh -MN getafix'.split())
master2 = subprocess.Popen('ssh -MN xp_build'.split())
try:
while not glob.glob(os.path.expanduser('~/.ssh/*kovid@xp_build*')):
time.sleep(0.05)
builder = subprocess.Popen('ssh xp_build ~/build-wpd'.split())
syncer = subprocess.Popen('ssh getafix ~/test-wpd'.split())
if builder.wait() != 0:
raise Exception('Failed to build plugin')
while not glob.glob(os.path.expanduser('~/.ssh/*kovid@getafix*')):
time.sleep(0.05)
syncer = subprocess.Popen('ssh getafix ~/test-wpd'.split())
if syncer.wait() != 0:
raise Exception('Failed to rsync to getafix')
subprocess.check_call(
'scp xp_build:build/calibre/src/calibre/plugins/wpd.pyd /tmp'.split())
('scp xp_build:build/calibre/src/calibre/plugins/%s.pyd /tmp'%mod).split())
subprocess.check_call(
'scp /tmp/wpd.pyd getafix:calibre/src/calibre/devices/mtp/windows'.split())
('scp /tmp/%s.pyd getafix:calibre/src/calibre/devices/mtp/windows'%mod).split())
p = subprocess.Popen(
'ssh getafix calibre-debug -e calibre/src/calibre/devices/mtp/windows/remote.py'.split())
p.wait()
print()
finally:
for m in (master2, master):
m.send_signal(signal.SIGHUP)
for m in (master2, master):
m.wait()
def main():
import pprint
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
fp, d = os.path.abspath(__file__), os.path.dirname
if b'CALIBRE_DEVELOP_FROM' not in os.environ:
env = os.environ.copy()
env[b'CALIBRE_DEVELOP_FROM'] = bytes(d(d(d(d(d(fp))))))
subprocess.call(['calibre-debug', '-e', fp], env=env)
return
sys.path.insert(0, os.path.dirname(fp))
if 'wpd' in sys.modules:
del sys.modules['wpd']
import wpd
from calibre.constants import plugins
plugins._plugins['wpd'] = (wpd, '')
sys.path.pop(0)
wpd.init('calibre', 1, 0, 0)
from calibre.devices.scanner import win_scanner
from calibre.devices.mtp.windows.driver import MTP_DEVICE
dev = MTP_DEVICE(None)
dev.startup()
print (dev.wpd, dev.wpd_error)
try:
for pnp_id in wpd.enumerate_devices():
print (pnp_id)
pprint.pprint(wpd.device_info(pnp_id))
devices = win_scanner()
pnp_id = dev.detect_managed_devices(devices)
# pprint.pprint(dev.detected_devices)
print ('Trying to connect to:', pnp_id)
dev.open(pnp_id, '')
print ('Connected to:', dev.get_gui_name())
print ('Total space', dev.total_space())
print ('Free space', dev.free_space())
finally:
wpd.uninit()
dev.shutdown()
if __name__ == '__main__':
main()

View File

@ -2,7 +2,7 @@
* utils.cpp
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the MIT license.
* Distributed under terms of the GPL3 license.
*/
#include "global.h"
@ -33,13 +33,12 @@ PyObject *wpd::hresult_set_exc(const char *msg, HRESULT hr) {
wchar_t *wpd::unicode_to_wchar(PyObject *o) {
wchar_t *buf;
Py_ssize_t len;
if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The pnp id must be a unicode object"); return NULL;}
if (!PyUnicode_Check(o)) {PyErr_Format(PyExc_TypeError, "The python object must be a unicode object"); return NULL;}
len = PyUnicode_GET_SIZE(o);
if (len < 1) {PyErr_Format(PyExc_TypeError, "The pnp id must not be empty."); return NULL;}
buf = (wchar_t *)calloc(len+2, sizeof(wchar_t));
if (buf == NULL) { PyErr_NoMemory(); return NULL; }
len = PyUnicode_AsWideChar((PyUnicodeObject*)o, buf, len);
if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid pnp id."); return NULL; }
if (len == -1) { free(buf); PyErr_Format(PyExc_TypeError, "Invalid python unicode object."); return NULL; }
return buf;
}

View File

@ -2,7 +2,7 @@
* mtp.c
* Copyright (C) 2012 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the MIT license.
* Distributed under terms of the GPL3 license.
*/
#include "global.h"
@ -92,14 +92,10 @@ wpd_enumerate_devices(PyObject *self, PyObject *args) {
ENSURE_WPD(NULL);
if (!PyArg_ParseTuple(args, "|O", &refresh)) return NULL;
if (refresh != NULL && PyObject_IsTrue(refresh)) {
Py_BEGIN_ALLOW_THREADS;
hr = portable_device_manager->RefreshDeviceList();
Py_END_ALLOW_THREADS;
if (FAILED(hr)) return hresult_set_exc("Failed to refresh the list of portable devices", hr);
}
hr = portable_device_manager->GetDevices(NULL, &num_of_devices);
num_of_devices += 15; // Incase new devices were connected between this call and the next
@ -148,6 +144,7 @@ wpd_device_info(PyObject *self, PyObject *args) {
if (!PyArg_ParseTuple(args, "O", &py_pnp_id)) return NULL;
pnp_id = unicode_to_wchar(py_pnp_id);
if (wcslen(pnp_id) < 1) { PyErr_SetString(WPDError, "The PNP id must not be empty."); return NULL; }
if (pnp_id == NULL) return NULL;
client_information = get_client_information();
@ -174,7 +171,7 @@ static PyMethodDef wpd_methods[] = {
},
{"enumerate_devices", wpd_enumerate_devices, METH_VARARGS,
"enumerate_devices(refresh=False)\n\n Get the list of device PnP ids for all connected devices recognized by the WPD service. The result is cached, unless refresh=True. Do not call with refresh=True too often as it is resource intensive."
"enumerate_devices()\n\n Get the list of device PnP ids for all connected devices recognized by the WPD service. Do not call too often as it is resource intensive."
},
{"device_info", wpd_device_info, METH_VARARGS,
@ -189,6 +186,10 @@ PyMODINIT_FUNC
initwpd(void) {
PyObject *m;
wpd::DeviceType.tp_new = PyType_GenericNew;
if (PyType_Ready(&wpd::DeviceType) < 0)
return;
m = Py_InitModule3("wpd", wpd_methods, "Interface to the WPD windows service.");
if (m == NULL) return;
@ -197,6 +198,10 @@ initwpd(void) {
NoWPD = PyErr_NewException("wpd.NoWPD", NULL, NULL);
if (NoWPD == NULL) return;
Py_INCREF(&DeviceType);
PyModule_AddObject(m, "Device", (PyObject *)&DeviceType);
}

View File

@ -193,7 +193,11 @@ class PRST1(USBMS):
time_offsets = {}
for i, row in enumerate(cursor):
try:
comp_date = int(os.path.getmtime(self.normalize_path(prefix + row[0])) * 1000);
except (OSError, IOError):
# In case the db has incorrect path info
continue
device_date = int(row[1]);
offset = device_date - comp_date
time_offsets.setdefault(offset, 0)

View File

@ -10,7 +10,8 @@ from threading import RLock
from collections import namedtuple
from calibre import prints, as_unicode
from calibre.constants import iswindows, isosx, plugins, islinux, isfreebsd
from calibre.constants import (iswindows, isosx, plugins, islinux, isfreebsd,
isnetbsd)
osx_scanner = win_scanner = linux_scanner = None
@ -253,13 +254,18 @@ freebsd_scanner = None
if isfreebsd:
freebsd_scanner = FreeBSDScanner()
netbsd_scanner = None
''' NetBSD support currently not written yet '''
if isnetbsd:
netbsd_scanner = None
class DeviceScanner(object):
def __init__(self, *args):
if isosx and osx_scanner is None:
raise RuntimeError('The Python extension usbobserver must be available on OS X.')
self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else linux_scanner
self.scanner = win_scanner if iswindows else osx_scanner if isosx else freebsd_scanner if isfreebsd else netbsd_scanner if isnetbsd else linux_scanner
self.devices = []
def scan(self):

View File

@ -11,11 +11,12 @@ import socket, select, json, inspect, os, traceback, time, sys, random
import hashlib, threading
from base64 import b64encode, b64decode
from functools import wraps
from errno import EAGAIN, EINTR
from calibre import prints
from calibre.constants import numeric_version, DEBUG
from calibre.devices.errors import (OpenFailed, ControlError, TimeoutError,
InitialConnectionError)
InitialConnectionError, PacketError)
from calibre.devices.interface import DevicePlugin
from calibre.devices.usbms.books import Book, CollectionsBookList
from calibre.devices.usbms.deviceconfig import DeviceConfig
@ -85,6 +86,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
MAX_CLIENT_COMM_TIMEOUT = 60.0 # Wait at most N seconds for an answer
MAX_UNSUCCESSFUL_CONNECTS = 5
SEND_NOOP_EVERY_NTH_PROBE = 5
DISCONNECT_AFTER_N_SECONDS = 30*60 # 30 minutes
opcodes = {
'NOOP' : 12,
'OK' : 0,
@ -120,7 +124,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
_('Use fixed network port') + ':::<p>' +
_('If checked, use the port number in the "Port" box, otherwise '
'the driver will pick a random port') + '</p>',
_('Port') + ':::<p>' +
_('Port number: ') + ':::<p>' +
_('Enter the port number the driver is to use if the "fixed port" box is checked') + '</p>',
_('Print extra debug information') + ':::<p>' +
_('Check this box if requested when reporting problems') + '</p>',
@ -131,7 +135,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
_('. Two special collections are available: %(abt)s:%(abtv)s and %(aba)s:%(abav)s. Add '
'these values to the list to enable them. The collections will be '
'given the name provided after the ":" character.')%dict(
abt='abt', abtv=ALL_BY_TITLE, aba='aba', abav=ALL_BY_AUTHOR)
abt='abt', abtv=ALL_BY_TITLE, aba='aba', abav=ALL_BY_AUTHOR),
'',
_('Enable the no-activity timeout') + ':::<p>' +
_('If this box is checked, calibre will automatically disconnect if '
'a connected device does nothing for %d minutes. Unchecking this '
' box disables this timeout, so calibre will never automatically '
'disconnect.')%(DISCONNECT_AFTER_N_SECONDS/60,) + '</p>',
]
EXTRA_CUSTOMIZATION_DEFAULT = [
False,
@ -141,7 +151,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
False, '9090',
False,
'',
''
'',
'',
True,
]
OPT_AUTOSTART = 0
OPT_PASSWORD = 2
@ -149,6 +161,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
OPT_PORT_NUMBER = 5
OPT_EXTRA_DEBUG = 6
OPT_COLLECTIONS = 8
OPT_AUTODISCONNECT = 10
def __init__(self, path):
self.sync_lock = threading.RLock()
@ -165,6 +178,15 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
inspect.stack()[1][3]), end='')
for a in args:
try:
if isinstance(a, dict):
printable = {}
for k,v in a.iteritems():
if isinstance(v, (str, unicode)) and len(v) > 50:
printable[k] = 'too long'
else:
printable[k] = v
prints('', printable, end='');
else:
prints('', a, end='')
except:
prints('', 'value too long', end='')
@ -339,6 +361,27 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
pos += len(v)
return data
def _send_byte_string(self, s):
if not isinstance(s, bytes):
self._debug('given a non-byte string!')
raise PacketError("Internal error: found a string that isn't bytes")
sent_len = 0;
total_len = len(s)
while sent_len < total_len:
try:
if sent_len == 0:
amt_sent = self.device_socket.send(s)
else:
amt_sent = self.device_socket.send(s[sent_len:])
if amt_sent <= 0:
raise IOError('Bad write on device socket');
sent_len += amt_sent
except socket.error as e:
self._debug('socket error', e, e.errno)
if e.args[0] != EAGAIN and e.args[0] != EINTR:
raise
time.sleep(0.1) # lets not hammer the OS too hard
def _call_client(self, op, arg, print_debug_info=True):
if op != 'NOOP':
self.noop_counter = 0
@ -355,9 +398,9 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if print_debug_info and extra_debug:
self._debug('send string', s)
self.device_socket.settimeout(self.MAX_CLIENT_COMM_TIMEOUT)
self.device_socket.sendall(('%d' % len(s))+s)
self.device_socket.settimeout(None)
self._send_byte_string((b'%d' % len(s))+s)
v = self._read_string_from_net()
self.device_socket.settimeout(None)
if print_debug_info and extra_debug:
self._debug('received string', v)
if v:
@ -373,13 +416,13 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
except socket.error:
self._debug('device went away')
self._close_device_socket()
raise ControlError('Device closed the network connection')
raise ControlError(desc='Device closed the network connection')
except:
self._debug('other exception')
traceback.print_exc()
self._close_device_socket()
raise
raise ControlError('Device responded with incorrect information')
raise ControlError(desc='Device responded with incorrect information')
# Write a file as a series of base64-encoded strings.
def _put_file(self, infile, lpath, book_metadata, this_book, total_books):
@ -475,7 +518,8 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self.is_connected = False
if self.is_connected:
self.noop_counter += 1
if only_presence and (self.noop_counter % 5) != 1:
if only_presence and (
self.noop_counter % self.SEND_NOOP_EVERY_NTH_PROBE) != 1:
try:
ans = select.select((self.device_socket,), (), (), 0)
if len(ans[0]) == 0:
@ -486,6 +530,11 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
# This will usually toss an exception if the socket is gone.
except:
pass
if (self.settings().extra_customization[self.OPT_AUTODISCONNECT] and
self.noop_counter > self.DISCONNECT_AFTER_N_SECONDS):
self._close_device_socket()
self._debug('timeout -- disconnected')
else:
try:
if self._call_client('NOOP', dict())[0] is None:
self._close_device_socket()
@ -533,7 +582,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._debug()
if not self.is_connected:
# We have been called to retry the connection. Give up immediately
raise ControlError('Attempt to open a closed device')
raise ControlError(desc='Attempt to open a closed device')
self.current_library_uuid = library_uuid
self.current_library_name = current_library_name()
try:
@ -569,6 +618,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._debug('Protocol error - bogus book packet length')
self._close_device_socket()
return False
self._debug('CC version #:', result.get('ccVersionNumber', 'unknown'))
self.max_book_packet_len = result.get('maxBookContentPacketLen',
self.BASE_PACKET_LEN)
exts = result.get('acceptedExtensions', None)
@ -689,7 +739,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
self._set_known_metadata(book)
bl.add_book(book, replace_metadata=True)
else:
raise ControlError('book metadata not returned')
raise ControlError(desc='book metadata not returned')
return bl
@synchronous('sync_lock')
@ -720,7 +770,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
print_debug_info=False)
if opcode != 'OK':
self._debug('protocol error', opcode, i)
raise ControlError('sync_booklists')
raise ControlError(desc='sync_booklists')
@synchronous('sync_lock')
def eject(self):
@ -748,7 +798,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
book = Book(self.PREFIX, lpath, other=mdata)
length = self._put_file(infile, lpath, book, i, len(files))
if length < 0:
raise ControlError('Sending book %s to device failed' % lpath)
raise ControlError(desc='Sending book %s to device failed' % lpath)
paths.append((lpath, length))
# No need to deal with covers. The client will get the thumbnails
# in the mi structure
@ -789,7 +839,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
if opcode == 'OK':
self._debug('removed book with UUID', result['uuid'])
else:
raise ControlError('Protocol error - delete books')
raise ControlError(desc='Protocol error - delete books')
@synchronous('sync_lock')
def remove_books_from_metadata(self, paths, booklists):
@ -825,7 +875,7 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
else:
eof = True
else:
raise ControlError('request for book data failed')
raise ControlError(desc='request for book data failed')
@synchronous('sync_lock')
def set_plugboards(self, plugboards, pb_func):

View File

@ -88,6 +88,15 @@ class MOBIOutput(OutputFormatPlugin):
'formats. This option tells calibre not to do this. '
'Useful if your document contains lots of GIF/PNG images that '
'become very large when converted to JPEG.')),
OptionRecommendation(name='mobi_file_type', choices=['old', 'both',
'new'], recommended_value='old',
help=_('By default calibre generates MOBI files that contain the '
'old MOBI 6 format. This format is compatible with all '
'devices. However, by changing this setting, you can tell '
'calibre to generate MOBI files that contain both MOBI 6 and '
'the new KF8 format, or only the new KF8 format. KF8 has '
'more features than MOBI 6, but only works with newer Kindles.')),
])
def check_for_periodical(self):
@ -165,11 +174,10 @@ class MOBIOutput(OutputFormatPlugin):
toc.nodes[0].href = toc.nodes[0].nodes[0].href
def convert(self, oeb, output_path, input_plugin, opts, log):
from calibre.utils.config import tweaks
from calibre.ebooks.mobi.writer2.resources import Resources
self.log, self.opts, self.oeb = log, opts, oeb
mobi_type = tweaks.get('test_mobi_output_type', 'old')
mobi_type = opts.mobi_file_type
if self.is_periodical:
mobi_type = 'old' # Amazon does not support KF8 periodicals
create_kf8 = mobi_type in ('new', 'both')

View File

@ -11,6 +11,7 @@ from collections import defaultdict
from lxml import etree
import cssutils
from cssutils.css import Property
from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
namespace, barename, XPath)
@ -276,10 +277,16 @@ class CSSFlattener(object):
cssdict['font-family'] = node.attrib['face']
del node.attrib['face']
if 'color' in node.attrib:
cssdict['color'] = node.attrib['color']
try:
cssdict['color'] = Property('color', node.attrib['color']).value
except ValueError:
pass
del node.attrib['color']
if 'bgcolor' in node.attrib:
cssdict['background-color'] = node.attrib['bgcolor']
try:
cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
except ValueError:
pass
del node.attrib['bgcolor']
if cssdict.get('font-weight', '').lower() == 'medium':
cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium

View File

@ -15,13 +15,15 @@ from calibre.utils.icu import sort_key
from catalog_epub_mobi_ui import Ui_Form
from PyQt4.Qt import (Qt, QAbstractItemView, QCheckBox, QComboBox,
QDoubleSpinBox, QIcon, QLineEdit, QRadioButton, QSize, QSizePolicy,
QTableWidget, QTableWidgetItem, QToolButton, QVBoxLayout, QWidget)
QDoubleSpinBox, QIcon, QLineEdit, QObject, QRadioButton, QSize, QSizePolicy,
QTableWidget, QTableWidgetItem, QToolButton, QVBoxLayout, QWidget,
SIGNAL)
class PluginWidget(QWidget,Ui_Form):
TITLE = _('E-book options')
HELP = _('Options specific to')+' AZW3/EPUB/MOBI '+_('output')
DEBUG = False
# Output synced to the connected device?
sync_enabled = True
@ -100,6 +102,39 @@ class PluginWidget(QWidget,Ui_Form):
self.OPTION_FIELDS = option_fields
def construct_tw_opts_object(self, c_name, opt_value, opts_dict):
'''
Build an opts object from the UI settings to pass to the catalog builder
Handles two types of rules sets, with and without ['prefix'] field
Store processed opts object to opt_dict
'''
rule_set = []
for stored_rule in opt_value:
rule = copy(stored_rule)
# Skip disabled and incomplete rules
if not rule['enabled']:
continue
elif not rule['field'] or not rule['pattern']:
continue
elif 'prefix' in rule and not rule['prefix']:
continue
else:
if rule['field'] != 'Tags':
# Look up custom column friendly name
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'] in [_('any value'),_('any date')]:
rule_pattern = '.*'
elif rule['pattern'] == _('unspecified'):
rule['pattern'] = 'None'
if 'prefix' in rule:
pr = (rule['name'],rule['field'],rule['pattern'],rule['prefix'])
else:
pr = (rule['name'],rule['field'],rule['pattern'])
rule_set.append(pr)
opt_value = tuple(rule_set)
# Strip off the trailing '_tw'
opts_dict[c_name[:-3]] = opt_value
def fetchEligibleCustomFields(self):
self.all_custom_fields = self.db.custom_field_keys()
custom_fields = {}
@ -194,11 +229,10 @@ class PluginWidget(QWidget,Ui_Form):
def options(self):
# Save/return the current options
# exclude_genre stores literally
# generate_titles, generate_recently_added store as True/False
# Section switches store as True/False
# others store as lists
opts_dict = {}
# Save values to gprefs
prefix_rules_processed = False
exclusion_rules_processed = False
@ -229,56 +263,8 @@ class PluginWidget(QWidget,Ui_Form):
gprefs.set(self.name + '_' + c_name, opt_value)
# Construct opts object for catalog builder
if c_name == 'prefix_rules_tw':
rule_set = []
for stored_rule in opt_value:
# Test for empty name/field/pattern/prefix, continue
# If pattern = any or unspecified, convert to regex
rule = copy(stored_rule)
if not rule['enabled']:
continue
elif not rule['field'] or not rule['pattern'] or not rule['prefix']:
continue
else:
if rule['field'] != 'Tags':
# Look up custom column name
#print(self.eligible_custom_fields[rule['field']]['field'])
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'].startswith('any'):
rule['pattern'] = '.*'
elif rule['pattern'] == 'unspecified':
rule['pattern'] = 'None'
pr = (rule['name'],rule['field'],rule['pattern'],rule['prefix'])
rule_set.append(pr)
opt_value = tuple(rule_set)
opts_dict['prefix_rules'] = opt_value
elif c_name == 'exclusion_rules_tw':
rule_set = []
for stored_rule in opt_value:
# Test for empty name/field/pattern/prefix, continue
# If pattern = any or unspecified, convert to regex
rule = copy(stored_rule)
if not rule['enabled']:
continue
elif not rule['field'] or not rule['pattern']:
continue
else:
if rule['field'] != 'Tags':
# Look up custom column name
#print(self.eligible_custom_fields[rule['field']]['field'])
rule['field'] = self.eligible_custom_fields[rule['field']]['field']
if rule['pattern'].startswith('any'):
rule['pattern'] = '.*'
elif rule['pattern'] == 'unspecified':
rule['pattern'] = 'None'
pr = (rule['name'],rule['field'],rule['pattern'])
rule_set.append(pr)
opt_value = tuple(rule_set)
opts_dict['exclusion_rules'] = opt_value
if c_name in ['exclusion_rules_tw','prefix_rules_tw']:
self.construct_tw_opts_object(c_name, opt_value, opts_dict)
else:
opts_dict[c_name] = opt_value
@ -299,7 +285,7 @@ class PluginWidget(QWidget,Ui_Form):
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
except:
opts_dict['output_profile'] = ['default']
if False:
if self.DEBUG:
print "opts_dict"
for opt in sorted(opts_dict.keys(), key=sort_key):
print " %s: %s" % (opt, repr(opts_dict[opt]))
@ -343,7 +329,6 @@ class PluginWidget(QWidget,Ui_Form):
self.header_note_source_fields = custom_fields
self.header_note_source_field.currentIndexChanged.connect(self.header_note_source_field_changed)
# Populate the 'Merge with Comments' combo box
custom_fields = {}
for custom_field in self.all_custom_fields:
@ -450,10 +435,11 @@ class ComboBox(NoWheelComboBox):
class GenericRulesTable(QTableWidget):
'''
Generic methods for managing rows
Add QTableWidget, controls to parent QGroupBox
placeholders for basic methods to be overriden
Generic methods for managing rows in a QTableWidget
'''
DEBUG = False
MAXIMUM_TABLE_HEIGHT = 113
NAME_FIELD_WIDTH = 225
def __init__(self, parent_gb, object_name, rules, eligible_custom_fields, db):
self.rules = rules
@ -464,13 +450,12 @@ class GenericRulesTable(QTableWidget):
self.layout = parent_gb.layout()
# Add ourselves to the layout
#print("verticalHeader: %s" % dir(self.verticalHeader()))
sizePolicy = QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
#sizePolicy.setHeightForWidth(self.sizePolicy().hasHeightForWidth())
self.setSizePolicy(sizePolicy)
self.setMaximumSize(QSize(16777215, 113))
self.setMaximumSize(QSize(16777215, self.MAXIMUM_TABLE_HEIGHT))
self.setColumnCount(0)
self.setRowCount(0)
@ -481,6 +466,9 @@ class GenericRulesTable(QTableWidget):
self._init_controls()
# Hook check_box changes
QObject.connect(self, SIGNAL('cellChanged(int,int)'), self.enabled_state_changed)
def _init_controls(self):
# Add the control set
vbl = QVBoxLayout()
@ -517,6 +505,8 @@ class GenericRulesTable(QTableWidget):
def add_row(self):
self.setFocus()
row = self.last_row_selected + 1
if self.DEBUG:
print("%s:add_row(): at row: %d" % (self.objectName(), row))
self.insertRow(row)
self.populate_table_row(row, self.create_blank_row_data())
self.select_and_scroll_to_row(row)
@ -524,19 +514,10 @@ class GenericRulesTable(QTableWidget):
# In case table was empty
self.horizontalHeader().setStretchLastSection(True)
def convert_row_to_data(self):
'''
override
'''
pass
def create_blank_row_data(self):
'''
override
'''
pass
def delete_row(self):
if self.DEBUG:
print("%s:delete_row()" % self.objectName())
self.setFocus()
rows = self.last_rows_selected
if len(rows) == 0:
@ -545,10 +526,11 @@ class GenericRulesTable(QTableWidget):
first = rows[0].row() + 1
last = rows[-1].row() + 1
message = _('Are you sure you want to delete rule %d?') % first
first_rule_name = unicode(self.cellWidget(first-1,self.COLUMNS['NAME']['ordinal']).text()).strip()
message = _("Are you sure you want to delete '%s'?") % (first_rule_name)
if len(rows) > 1:
message = _('Are you sure you want to delete rules %d-%d?') % (first, last)
if not question_dialog(self, _('Are you sure?'), message, show_copy_button=False):
message = _('Are you sure you want to delete rules #%d-%d?') % (first, last)
if not question_dialog(self, _('Delete Rule'), message, show_copy_button=False):
return
first_sel_row = self.currentRow()
for selrow in reversed(rows):
@ -558,17 +540,24 @@ class GenericRulesTable(QTableWidget):
elif self.rowCount() > 0:
self.select_and_scroll_to_row(first_sel_row - 1)
def enabled_state_changed(self, row, col):
if col in [self.COLUMNS['ENABLED']['ordinal']]:
self.select_and_scroll_to_row(row)
if self.DEBUG:
print("%s:enabled_state_changed(): row %d col %d" %
(self.objectName(), row, col))
def focusInEvent(self,e):
if self.DEBUG:
print("%s:focusInEvent()" % self.objectName())
def focusOutEvent(self,e):
# Override of QTableWidget method - clear selection when table loses focus
self.last_row_selected = self.currentRow()
self.last_rows_selected = self.selectionModel().selectedRows()
self.clearSelection()
def get_data(self):
'''
override
'''
pass
if self.DEBUG:
print("%s:focusOutEvent(): self.last_row_selected: %d" % (self.objectName(),self.last_row_selected))
def move_row_down(self):
self.setFocus()
@ -583,6 +572,8 @@ class GenericRulesTable(QTableWidget):
for selrow in reversed(rows):
dest_row = selrow.row() + 1
src_row = selrow.row()
if self.DEBUG:
print("%s:move_row_down() %d -> %d" % (self.objectName(),src_row, dest_row))
# Save the contents of the destination row
saved_data = self.convert_row_to_data(dest_row)
@ -596,11 +587,9 @@ class GenericRulesTable(QTableWidget):
# Populate it with the saved data
self.populate_table_row(src_row, saved_data)
self.blockSignals(False)
scroll_to_row = last_sel_row + 1
if scroll_to_row < self.rowCount() - 1:
scroll_to_row = scroll_to_row + 1
self.select_and_scroll_to_row(scroll_to_row)
self.blockSignals(False)
def move_row_up(self):
self.setFocus()
@ -611,7 +600,11 @@ class GenericRulesTable(QTableWidget):
if first_sel_row <= 0:
return
self.blockSignals(True)
for selrow in rows:
if self.DEBUG:
print("%s:move_row_up() %d -> %d" % (self.objectName(),selrow.row(), selrow.row()-1))
# Save the row above
saved_data = self.convert_row_to_data(selrow.row() - 1)
@ -621,33 +614,92 @@ class GenericRulesTable(QTableWidget):
# Delete the row above
self.removeRow(selrow.row() - 1)
self.blockSignals(False)
scroll_to_row = first_sel_row - 1
scroll_to_row = first_sel_row
if scroll_to_row > 0:
scroll_to_row = scroll_to_row - 1
self.select_and_scroll_to_row(scroll_to_row)
self.blockSignals(False)
def populate_table_row(self):
'''
override
'''
pass
def populate_table(self):
# Format of rules list is different if default values vs retrieved JSON
# Hack to normalize list style
rules = self.rules
if rules and type(rules[0]) is list:
rules = rules[0]
self.setFocus()
rules = sorted(rules, key=lambda k: k['ordinal'])
for row, rule in enumerate(rules):
self.insertRow(row)
self.select_and_scroll_to_row(row)
self.populate_table_row(row, rule)
self.selectRow(0)
def resize_name(self, scale):
#current_width = self.columnWidth(1)
#self.setColumnWidth(1, min(225,int(current_width * scale)))
self.setColumnWidth(1, 225)
def resize_name(self):
self.setColumnWidth(1, self.NAME_FIELD_WIDTH)
def rule_name_edited(self):
if self.DEBUG:
print("%s:rule_name_edited()" % self.objectName())
current_row = self.currentRow()
self.cellWidget(current_row,1).home(False)
self.setFocus()
self.select_and_scroll_to_row(current_row)
def select_and_scroll_to_row(self, row):
self.setFocus()
self.selectRow(row)
self.scrollToItem(self.currentItem())
self.last_row_selected = self.currentRow()
self.last_rows_selected = self.selectionModel().selectedRows()
def _source_index_changed(self, combo):
# Figure out which row we're in
for row in range(self.rowCount()):
if self.cellWidget(row, self.COLUMNS['FIELD']['ordinal']) is combo:
break
if self.DEBUG:
print("%s:_source_index_changed(): calling source_index_changed with row: %d " %
(self.objectName(), row))
self.source_index_changed(combo, row)
def source_index_changed(self, combo, row, pattern=''):
# Populate the Pattern field based upon the Source field
source_field = str(combo.currentText())
if source_field == '':
values = []
elif source_field == 'Tags':
values = sorted(self.db.all_tags(), key=sort_key)
else:
if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
values = self.db.all_custom(self.db.field_metadata.key_to_label(
self.eligible_custom_fields[unicode(source_field)]['field']))
values = sorted(values, key=sort_key)
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
values = [_('True'),_('False'),_('unspecified')]
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
values = [_('any value'),_('unspecified')]
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
values = [_('any date'),_('unspecified')]
values_combo = ComboBox(self, values, pattern)
values_combo.currentIndexChanged.connect(partial(self.values_index_changed, values_combo))
self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
self.select_and_scroll_to_row(row)
def values_index_changed(self, combo):
# After edit, select row
for row in range(self.rowCount()):
if self.cellWidget(row, self.COLUMNS['PATTERN']['ordinal']) is combo:
self.select_and_scroll_to_row(row)
break
if self.DEBUG:
print("%s:values_index_changed(): row %d " %
(self.objectName(), row))
class ExclusionRules(GenericRulesTable):
@ -658,6 +710,7 @@ class ExclusionRules(GenericRulesTable):
def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db):
super(ExclusionRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db)
self.setObjectName("exclusion_rules_table")
self._init_table_widget()
self._initialize()
@ -672,7 +725,7 @@ class ExclusionRules(GenericRulesTable):
def _initialize(self):
self.populate_table()
self.resizeColumnsToContents()
self.resize_name(1.5)
self.resize_name()
self.horizontalHeader().setStretchLastSection(True)
self.clearSelection()
@ -706,20 +759,6 @@ class ExclusionRules(GenericRulesTable):
'pattern':data['pattern']})
return data_items
def populate_table(self):
# Format of rules list is different if default values vs retrieved JSON
# Hack to normalize list style
rules = self.rules
if rules and type(rules[0]) is list:
rules = rules[0]
self.setFocus()
rules = sorted(rules, key=lambda k: k['ordinal'])
for row, rule in enumerate(rules):
self.insertRow(row)
self.select_and_scroll_to_row(row)
self.populate_table_row(row, rule)
self.selectRow(0)
def populate_table_row(self, row, data):
def set_rule_name_in_row(row, col, name=''):
@ -730,7 +769,7 @@ class ExclusionRules(GenericRulesTable):
def set_source_field_in_row(row, col, field=''):
source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field)
source_combo.currentIndexChanged.connect(partial(self.source_index_changed, source_combo, row))
source_combo.currentIndexChanged.connect(partial(self._source_index_changed, source_combo))
self.setCellWidget(row, col, source_combo)
return source_combo
@ -738,7 +777,8 @@ class ExclusionRules(GenericRulesTable):
self.blockSignals(True)
# Enabled
self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled']))
check_box = CheckableTableWidgetItem(data['enabled'])
self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], check_box)
# Rule name
set_rule_name_in_row(row, self.COLUMNS['NAME']['ordinal'], name=data['name'])
@ -748,32 +788,10 @@ class ExclusionRules(GenericRulesTable):
# Pattern
# The contents of the Pattern field is driven by the Source field
self.source_index_changed(source_combo, row, self.COLUMNS['PATTERN']['ordinal'], pattern=data['pattern'])
self.source_index_changed(source_combo, row, pattern=data['pattern'])
self.blockSignals(False)
def source_index_changed(self, combo, row, col, pattern=''):
# Populate the Pattern field based upon the Source field
source_field = str(combo.currentText())
if source_field == '':
values = []
elif source_field == 'Tags':
values = sorted(self.db.all_tags(), key=sort_key)
else:
if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
values = self.db.all_custom(self.db.field_metadata.key_to_label(
self.eligible_custom_fields[unicode(source_field)]['field']))
values = sorted(values, key=sort_key)
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
values = ['True','False','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
values = ['any value','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
values = ['any date','unspecified']
values_combo = ComboBox(self, values, pattern)
self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)
class PrefixRules(GenericRulesTable):
COLUMNS = { 'ENABLED':{'ordinal': 0, 'name': ''},
@ -784,6 +802,7 @@ class PrefixRules(GenericRulesTable):
def __init__(self, parent_gb_hl, object_name, rules, eligible_custom_fields, db):
super(PrefixRules, self).__init__(parent_gb_hl, object_name, rules, eligible_custom_fields, db)
self.setObjectName("prefix_rules_table")
self._init_table_widget()
self._initialize()
@ -799,14 +818,14 @@ class PrefixRules(GenericRulesTable):
self.generate_prefix_list()
self.populate_table()
self.resizeColumnsToContents()
self.resize_name(1.5)
self.resize_name()
self.horizontalHeader().setStretchLastSection(True)
self.clearSelection()
def convert_row_to_data(self, row):
data = self.create_blank_row_data()
data['ordinal'] = row
data['enabled'] = self.item(row,0).checkState() == Qt.Checked
data['enabled'] = self.item(row,self.COLUMNS['ENABLED']['ordinal']).checkState() == Qt.Checked
data['name'] = unicode(self.cellWidget(row,self.COLUMNS['NAME']['ordinal']).text()).strip()
data['prefix'] = unicode(self.cellWidget(row,self.COLUMNS['PREFIX']['ordinal']).currentText()).strip()
data['field'] = unicode(self.cellWidget(row,self.COLUMNS['FIELD']['ordinal']).currentText()).strip()
@ -970,20 +989,6 @@ class PrefixRules(GenericRulesTable):
'prefix':data['prefix']})
return data_items
def populate_table(self):
# Format of rules list is different if default values vs retrieved JSON
# Hack to normalize list style
rules = self.rules
if rules and type(rules[0]) is list:
rules = rules[0]
self.setFocus()
rules = sorted(rules, key=lambda k: k['ordinal'])
for row, rule in enumerate(rules):
self.insertRow(row)
self.select_and_scroll_to_row(row)
self.populate_table_row(row, rule)
self.selectRow(0)
def populate_table_row(self, row, data):
def set_prefix_field_in_row(row, col, field=''):
@ -998,14 +1003,12 @@ class PrefixRules(GenericRulesTable):
def set_source_field_in_row(row, col, field=''):
source_combo = ComboBox(self, sorted(self.eligible_custom_fields.keys(), key=sort_key), field)
source_combo.currentIndexChanged.connect(partial(self.source_index_changed, source_combo, row))
source_combo.currentIndexChanged.connect(partial(self._source_index_changed, source_combo))
self.setCellWidget(row, col, source_combo)
return source_combo
# Entry point
self.blockSignals(True)
#print("prefix_rules_populate_table_row processing rule:\n%s\n" % data)
# Enabled
self.setItem(row, self.COLUMNS['ENABLED']['ordinal'], CheckableTableWidgetItem(data['enabled']))
@ -1021,31 +1024,7 @@ class PrefixRules(GenericRulesTable):
# Pattern
# The contents of the Pattern field is driven by the Source field
self.source_index_changed(source_combo, row, self.COLUMNS['PATTERN']['ordinal'], pattern=data['pattern'])
self.source_index_changed(source_combo, row, pattern=data['pattern'])
self.blockSignals(False)
def source_index_changed(self, combo, row, col, pattern=''):
# Populate the Pattern field based upon the Source field
# row, col are the control that changed
source_field = str(combo.currentText())
if source_field == '':
values = []
elif source_field == 'Tags':
values = sorted(self.db.all_tags(), key=sort_key)
else:
if self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['enumeration', 'text']:
values = self.db.all_custom(self.db.field_metadata.key_to_label(
self.eligible_custom_fields[unicode(source_field)]['field']))
values = sorted(values, key=sort_key)
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['bool']:
values = ['True','False','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['composite']:
values = ['any value','unspecified']
elif self.eligible_custom_fields[unicode(source_field)]['datatype'] in ['datetime']:
values = ['any date','unspecified']
values_combo = ComboBox(self, values, pattern)
self.setCellWidget(row, self.COLUMNS['PATTERN']['ordinal'], values_combo)

View File

@ -25,7 +25,7 @@ class PluginWidget(Widget, Ui_Form):
'mobi_keep_original_images',
'mobi_ignore_margins', 'mobi_toc_at_start',
'dont_compress', 'no_inline_toc', 'share_not_sync',
'personal_doc']#, 'mobi_navpoints_only_deepest']
'personal_doc', 'mobi_file_type']
)
self.db, self.book_id = db, book_id
@ -48,6 +48,7 @@ class PluginWidget(Widget, Ui_Form):
self.font_family_model = font_family_model
self.opt_masthead_font.setModel(self.font_family_model)
'''
self.opt_mobi_file_type.addItems(['old', 'both', 'new'])
self.initialize_options(get_option, get_help, db, book_id)

View File

@ -14,80 +14,10 @@
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="8" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Kindle options</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QLabel" name="label_3">
<item row="0" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc">
<property name="text">
<string>Personal Doc tag:</string>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="opt_personal_doc"/>
</item>
</layout>
</item>
<item>
<widget class="QCheckBox" name="opt_share_not_sync">
<property name="text">
<string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
</property>
</widget>
</item>
<item>
<spacer name="verticalSpacer">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</item>
<item row="9" column="0">
<spacer name="verticalSpacer_2">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_mobi_toc_at_start">
<property name="text">
<string>Put generated Table of Contents at &amp;start of book instead of end</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QCheckBox" name="opt_mobi_ignore_margins">
<property name="text">
<string>Ignore &amp;margins</string>
</property>
</widget>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_prefer_author_sort">
<property name="text">
<string>Use author &amp;sort for author</string>
<string>Do not add Table of Contents to book</string>
</property>
</widget>
</item>
@ -104,17 +34,24 @@
<item row="1" column="1">
<widget class="QLineEdit" name="opt_toc_title"/>
</item>
<item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_compress">
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_mobi_toc_at_start">
<property name="text">
<string>Disable compression of the file contents</string>
<string>Put generated Table of Contents at &amp;start of book instead of end</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc">
<item row="3" column="0">
<widget class="QCheckBox" name="opt_mobi_ignore_margins">
<property name="text">
<string>Do not add Table of Contents to book</string>
<string>Ignore &amp;margins</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="opt_prefer_author_sort">
<property name="text">
<string>Use author &amp;sort for author</string>
</property>
</widget>
</item>
@ -125,6 +62,55 @@
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_compress">
<property name="text">
<string>Disable compression of the file contents</string>
</property>
</widget>
</item>
<item row="7" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Kindle options</string>
</property>
<layout class="QFormLayout" name="formLayout">
<property name="fieldGrowthPolicy">
<enum>QFormLayout::ExpandingFieldsGrow</enum>
</property>
<item row="0" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
<string>MOBI file &amp;type:</string>
</property>
<property name="buddy">
<cstring>opt_mobi_file_type</cstring>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QComboBox" name="opt_mobi_file_type"/>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Personal Doc tag:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="opt_personal_doc"/>
</item>
<item row="2" column="0" colspan="2">
<widget class="QCheckBox" name="opt_share_not_sync">
<property name="text">
<string>Enable sharing of book content via Facebook, etc. WARNING: Disables last read syncing</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
</layout>
</widget>
<resources/>

View File

@ -529,6 +529,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
self.remove_button.clicked.connect(self.s_r_remove_query)
self.queries = JSONConfig("search_replace_queries")
self.saved_search_name = ''
self.query_field.addItem("")
self.query_field_values = sorted([q for q in self.queries], key=sort_key)
self.query_field.addItems(self.query_field_values)
@ -1034,11 +1035,16 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
self.queries.commit()
def s_r_save_query(self, *args):
dex = self.query_field_values.index(self.saved_search_name)
names = ['']
names.extend(self.query_field_values)
try:
dex = names.index(self.saved_search_name)
except:
dex = 0
name = ''
while not name:
name, ok = QInputDialog.getItem(self, _('Save search/replace'),
_('Search/replace name:'), self.query_field_values, dex, True)
_('Search/replace name:'), names, dex, True)
if not ok:
return
if not name:
@ -1086,6 +1092,7 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
def s_r_query_change(self, item_name):
if not item_name:
self.s_r_reset_query_fields()
self.saved_search_name = ''
return
item = self.queries.get(unicode(item_name), None)
if item is None:

View File

@ -1241,17 +1241,18 @@ not multiple and the destination field is multiple</string>
<tabstop>search_mode</tabstop>
<tabstop>s_r_src_ident</tabstop>
<tabstop>s_r_template</tabstop>
<tabstop>search_for</tabstop>
<tabstop>case_sensitive</tabstop>
<tabstop>replace_with</tabstop>
<tabstop>replace_func</tabstop>
<tabstop>destination_field</tabstop>
<tabstop>replace_mode</tabstop>
<tabstop>comma_separated</tabstop>
<tabstop>s_r_dst_ident</tabstop>
<tabstop>results_count</tabstop>
<tabstop>scrollArea11</tabstop>
<tabstop>destination_field</tabstop>
<tabstop>search_for</tabstop>
<tabstop>case_sensitive</tabstop>
<tabstop>starting_from</tabstop>
<tabstop>multiple_separator</tabstop>
<tabstop>scrollArea11</tabstop>
</tabstops>
<resources>
<include location="../../../../resources/images.qrc"/>

View File

@ -310,8 +310,18 @@ class MetadataSingleDialogBase(ResizableDialog):
self.update_from_mi(mi)
def cover_from_format(self, *args):
try:
mi, ext = self.formats_manager.get_selected_format_metadata(self.db,
self.book_id)
except (IOError, OSError) as err:
if getattr(err, 'errno', None) == errno.EACCES: # Permission denied
import traceback
fname = err.filename if err.filename else 'file'
error_dialog(self, _('Permission denied'),
_('Could not open %s. Is it being used by another'
' program?')%fname, det_msg=traceback.format_exc(),
show=True)
return
if mi is None:
return
cdata = None

View File

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2012, Florent FAYOLLE <florent.fayolle69@gmail.com>'
__docformat__ = 'restructuredtext en'
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
from calibre.gui2.store.search_result import SearchResult
import unicodedata
#mimetypes.add_type('application/epub+zip', '.epub')
class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
open_search_url = 'http://www.ebooksgratuits.com/opds/opensearch.xml'
web_url = 'http://www.ebooksgratuits.com/'
def strip_accents(self, s):
return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
def search(self, query, max_results=10, timeout=60):
query = self.strip_accents(unicode(query))
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
if s.downloads:
s.drm = SearchResult.DRM_UNLOCKED
s.price = '$0.00'
yield s

View File

@ -417,7 +417,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
vprefs.set('viewer_splitter_state',
bytearray(self.splitter.saveState()))
vprefs['multiplier'] = self.view.multiplier
vprefs['in_paged_mode1'] = not self.action_toggle_paged_mode.isChecked()
vprefs['in_paged_mode'] = not self.action_toggle_paged_mode.isChecked()
def restore_state(self):
state = vprefs.get('viewer_toolbar_state', None)
@ -434,8 +434,8 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
# specific location, ensure they are visible.
self.tool_bar.setVisible(True)
self.tool_bar2.setVisible(True)
self.action_toggle_paged_mode.setChecked(not vprefs.get('in_paged_mode1',
False))
self.action_toggle_paged_mode.setChecked(not vprefs.get('in_paged_mode',
True))
self.toggle_paged_mode(self.action_toggle_paged_mode.isChecked(),
at_start=True)

View File

@ -440,8 +440,7 @@ class KindlePage(QWizardPage, KindleUI):
x = unicode(self.to_address.text()).strip()
parts = x.split('@')
if (self.send_email_widget.set_email_settings(True) and len(parts) >= 2
and parts[0]):
if (len(parts) >= 2 and parts[0] and self.send_email_widget.set_email_settings(True)):
conf = smtp_prefs()
accounts = conf.parse().accounts
if not accounts: accounts = {}
@ -676,8 +675,9 @@ class LibraryPage(QWizardPage, LibraryUI):
self.language.blockSignals(True)
self.language.clear()
from calibre.utils.localization import (available_translations,
get_language, get_lang)
get_language, get_lang, get_lc_messages_path)
lang = get_lang()
lang = get_lc_messages_path(lang) if lang else lang
if lang is None or lang not in available_translations():
lang = 'en'
def get_esc_lang(l):

View File

@ -341,7 +341,8 @@ class EPUB_MOBI(CatalogPlugin):
recommendations.append(('comments', '', OptionRecommendation.HIGH))
# >>> Use to debug generated catalog code before conversion <<<
#setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug"))
if False:
setattr(opts,'debug_pipeline',os.path.expanduser("~/Desktop/Catalog debug"))
dp = getattr(opts, 'debug_pipeline', None)
if dp is not None:

View File

@ -1188,11 +1188,11 @@ Author '{0}':
current_series = book['series']
pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series:
aTag = Tag(soup,'a')
aTag['href'] = "%s.html#%s_series" % ('BySeries',
re.sub('\s','',book['series']).lower())
aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(book['series']))
aTag.insert(0, book['series'])
pSeriesTag.insert(0, aTag)
else:
@ -1331,10 +1331,13 @@ Author '{0}':
current_series = new_entry['series']
pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series:
aTag = Tag(soup,'a')
aTag['href'] = "%s.html#%s_series" % ('BySeries',
re.sub('\W','',new_entry['series']).lower())
if self.letter_or_symbol(new_entry['series']) == self.SYMBOLS:
aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(new_entry['series']))
aTag.insert(0, new_entry['series'])
pSeriesTag.insert(0, aTag)
else:
@ -1741,17 +1744,6 @@ Author '{0}':
body = soup.find('body')
btc = 0
pTag = Tag(soup, "p")
pTag['style'] = 'display:none'
ptc = 0
aTag = Tag(soup,'a')
aTag['id'] = 'section_start'
pTag.insert(ptc, aTag)
ptc += 1
body.insert(btc, pTag)
btc += 1
divTag = Tag(soup, "div")
dtc = 0
current_letter = ""
@ -1787,11 +1779,10 @@ Author '{0}':
current_series = book['series']
pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
aTag = Tag(soup, 'a')
if self.letter_or_symbol(book['series']):
aTag['id'] = "symbol_%s_series" % re.sub('\W','',book['series']).lower()
else:
aTag['id'] = "%s_series" % re.sub('\W','',book['series']).lower()
aTag['id'] = self.generateSeriesAnchor(book['series'])
pSeriesTag.insert(0,aTag)
pSeriesTag.insert(1,NavigableString('%s' % book['series']))
divTag.insert(dtc,pSeriesTag)
@ -1847,16 +1838,20 @@ Author '{0}':
divTag.insert(dtc, pBookTag)
dtc += 1
if not self.__generateForKindle:
# Insert the <h2> tag with book_count at the head
#<h2><a name="byseries" id="byseries"></a>By Series</h2>
pTag = Tag(soup, "p")
pTag['class'] = 'title'
ptc = 0
aTag = Tag(soup,'a')
aTag['id'] = 'section_start'
pTag.insert(ptc, aTag)
ptc += 1
if not self.__generateForKindle:
# Insert the <h2> tag with book_count at the head
aTag = Tag(soup, "a")
anchor_name = friendly_name.lower()
aTag['id'] = anchor_name.replace(" ","")
pTag.insert(0,aTag)
#h2Tag.insert(1,NavigableString('%s (%d)' % (friendly_name, series_count)))
pTag.insert(1,NavigableString('%s' % friendly_name))
body.insert(btc,pTag)
btc += 1
@ -3353,15 +3348,23 @@ Author '{0}':
return codeTag
else:
spanTag = Tag(soup, "span")
spanTag['class'] = "prefix"
# color:white was the original technique used to align columns.
# The new technique is to float the prefix left with CSS.
if prefix_char is None:
if True:
prefix_char = "&nbsp;"
else:
del spanTag['class']
spanTag['style'] = "color:white"
prefix_char = self.defaultPrefix
spanTag.insert(0,NavigableString(prefix_char))
return spanTag
def generateAuthorAnchor(self, author):
# Strip white space to ''
return re.sub("\W","", author)
# Generate a legal XHTML id/href string
return re.sub("\W","", ascii_text(author))
def generateFormatArgs(self, book):
series_index = str(book['series_index'])
@ -3436,10 +3439,11 @@ Author '{0}':
current_series = book['series']
pSeriesTag = Tag(soup,'p')
pSeriesTag['class'] = "series"
if self.opts.fmt == 'mobi':
pSeriesTag['class'] = "series_mobi"
if self.opts.generate_series:
aTag = Tag(soup,'a')
aTag['href'] = "%s.html#%s_series" % ('BySeries',
re.sub('\W','',book['series']).lower())
aTag['href'] = "%s.html#%s" % ('BySeries', self.generateSeriesAnchor(book['series']))
aTag.insert(0, book['series'])
pSeriesTag.insert(0, aTag)
else:
@ -3641,12 +3645,7 @@ Author '{0}':
if aTag:
if book['series']:
if self.opts.generate_series:
if self.letter_or_symbol(book['series']):
aTag['href'] = "%s.html#symbol_%s_series" % ('BySeries',
re.sub('\W','',book['series']).lower())
else:
aTag['href'] = "%s.html#%s_series" % ('BySeries',
re.sub('\s','',book['series']).lower())
aTag['href'] = "%s.html#%s" % ('BySeries',self.generateSeriesAnchor(book['series']))
else:
aTag.extract()
@ -3780,6 +3779,13 @@ Author '{0}':
pass
return rating
def generateSeriesAnchor(self, series):
# Generate a legal XHTML id/href string
if self.letter_or_symbol(series) == self.SYMBOLS:
return "symbol_%s_series" % re.sub('\W','',series).lower()
else:
return "%s_series" % re.sub('\W','',ascii_text(series)).lower()
def generateShortDescription(self, description, dest=None):
# Truncate the description, on word boundaries if necessary
# Possible destinations:

View File

@ -11,7 +11,7 @@ import os, sys, shutil, cStringIO, glob, time, functools, traceback, re, \
from collections import defaultdict
import threading, random
from itertools import repeat
from math import ceil
from math import ceil, floor
from calibre import prints, force_unicode
from calibre.ebooks.metadata import (title_sort, author_to_author_sort,
@ -640,12 +640,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
if name and name != fname:
changed = True
break
if path == current_path and not changed:
return
tpath = os.path.join(self.library_path, *path.split('/'))
if not os.path.exists(tpath):
os.makedirs(tpath)
if path == current_path and not changed:
return
spath = os.path.join(self.library_path, *current_path.split('/'))
if current_path and os.path.exists(spath): # Migrate existing files
@ -1150,7 +1150,16 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
`data`: Can be either a QImage, QPixmap, file object or bytestring
'''
path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg')
base_path = os.path.join(self.library_path, self.path(id,
index_is_id=True))
if not os.path.exists(base_path):
self.set_path(id, index_is_id=True)
base_path = os.path.join(self.library_path, self.path(id,
index_is_id=True))
self.dirtied([id])
path = os.path.join(base_path, 'cover.jpg')
if callable(getattr(data, 'save', None)):
data.save(path)
else:
@ -2080,7 +2089,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return 1.0
series_indices = [x[0] for x in series_indices]
if tweaks['series_index_auto_increment'] == 'next':
return series_indices[-1] + 1
return floor(series_indices[-1]) + 1
if tweaks['series_index_auto_increment'] == 'first_free':
for i in range(1, 10000):
if i not in series_indices:

View File

@ -42,7 +42,7 @@ class Restore(Thread):
self.src_library_path = os.path.abspath(library_path)
self.progress_callback = progress_callback
self.db_id_regexp = re.compile(r'^.* \((\d+)\)$')
self.bad_ext_pat = re.compile(r'[^a-z0-9]+')
self.bad_ext_pat = re.compile(r'[^a-z0-9_]+')
if not callable(self.progress_callback):
self.progress_callback = lambda x, y: x
self.dirs = []

View File

@ -22,13 +22,18 @@ def available_translations():
_available_translations = [x for x in stats if stats[x] > 0.1]
return _available_translations
def get_lang():
'Try to figure out what language to display the interface in'
from calibre.utils.config_base import prefs
lang = prefs['language']
lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
if lang:
return lang
def get_system_locale():
from calibre.constants import iswindows
lang = None
if iswindows:
try:
from calibre.constants import get_windows_user_locale_name
lang = get_windows_user_locale_name()
lang = lang.strip()
if not lang: lang = None
except:
pass # Windows XP does not have the GetUserDefaultLocaleName fn
if lang is None:
try:
lang = locale.getdefaultlocale(['LANGUAGE', 'LC_ALL', 'LC_CTYPE',
'LC_MESSAGES', 'LANG'])[0]
@ -39,6 +44,25 @@ def get_lang():
lang = os.environ['LANG']
except:
pass
if lang:
lang = lang.replace('-', '_')
lang = '_'.join(lang.split('_')[:2])
return lang
def get_lang():
'Try to figure out what language to display the interface in'
from calibre.utils.config_base import prefs
lang = prefs['language']
lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
if lang:
return lang
try:
lang = get_system_locale()
except:
import traceback
traceback.print_exc()
lang = None
if lang:
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
if match:
@ -55,7 +79,7 @@ def get_lc_messages_path(lang):
if lang in available_translations():
hlang = lang
else:
xlang = lang.split('_')[0]
xlang = lang.split('_')[0].lower()
if xlang in available_translations():
hlang = xlang
return hlang