Merge from trunk

This commit is contained in:
Charles Haley 2012-12-06 10:14:36 +01:00
commit 4a4a540699
124 changed files with 54179 additions and 49303 deletions

View File

@ -20,6 +20,7 @@ class Aksiyon (BasicNewsRecipe):
auto_cleanup = True auto_cleanup = True
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg' cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg' masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
ignore_duplicate_articles = { 'title', 'url' }
remove_empty_feeds= True remove_empty_feeds= True
feeds = [ feeds = [
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'), ( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),

View File

@ -21,10 +21,11 @@ class Engadget(BasicNewsRecipe):
use_embedded_content = False use_embedded_content = False
remove_javascript = True remove_javascript = True
remove_empty_feeds = True remove_empty_feeds = True
auto_cleanup = True
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})] #keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})] #remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})] #remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')] feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]

View File

@ -6,22 +6,41 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
nytimes.com nytimes.com
''' '''
import re, string, time import re, string, time
from calibre import entity_to_unicode, strftime from calibre import strftime
from datetime import timedelta, date from datetime import timedelta, date
from time import sleep
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe): class NYTimes(BasicNewsRecipe):
recursions=1 # set this to zero to omit Related articles lists
# set getTechBlogs to True to include the technology blogs
# set tech_oldest_article to control article age
# set tech_max_articles_per_feed to control article count
getTechBlogs = True
remove_empty_feeds = True
tech_oldest_article = 14
tech_max_articles_per_feed = 25
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored. # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
headlinesOnly = True headlinesOnly = True
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the # set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
# number of days old an article can be for inclusion. If oldest_article = 0 all articles # number of days old an article can be for inclusion. If oldest_web_article = None all articles
# will be included. Note: oldest_article is ignored if webEdition = False # will be included. Note: oldest_web_article is ignored if webEdition = False
webEdition = False webEdition = False
oldest_article = 7 oldest_web_article = 7
# download higher resolution images than the small thumbnails typically included in the article
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
useHighResImages = True
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
replaceKindleVersion = False
# includeSections: List of sections to include. If empty, all sections found will be included. # includeSections: List of sections to include. If empty, all sections found will be included.
# Otherwise, only the sections named will be included. For example, # Otherwise, only the sections named will be included. For example,
@ -82,57 +101,68 @@ class NYTimes(BasicNewsRecipe):
('Education',u'education'), ('Education',u'education'),
('Multimedia',u'multimedia'), ('Multimedia',u'multimedia'),
(u'Obituaries',u'obituaries'), (u'Obituaries',u'obituaries'),
(u'Sunday Magazine',u'magazine'), (u'Sunday Magazine',u'magazine')
(u'Week in Review',u'weekinreview')] ]
tech_feeds = [
(u'Tech - Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
(u'Tech - Bits', u'http://bits.blogs.nytimes.com/feed/'),
(u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
]
if headlinesOnly: if headlinesOnly:
title='New York Times Headlines' title='New York Times Headlines'
description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com' description = 'Headlines from the New York Times'
needs_subscription = 'optional' needs_subscription = False
elif webEdition: elif webEdition:
title='New York Times (Web)' title='New York Times (Web)'
description = 'New York Times on the Web' description = 'New York Times on the Web'
needs_subscription = True needs_subscription = False
elif replaceKindleVersion:
title='The New York Times'
description = 'Today\'s New York Times'
needs_subscription = False
else: else:
title='New York Times' title='New York Times'
description = 'Today\'s New York Times' description = 'Today\'s New York Times'
needs_subscription = True needs_subscription = False
def decode_url_date(self,url):
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december'] urlitems = url.split('/')
def decode_us_date(self,datestr):
udate = datestr.strip().lower().split()
try: try:
m = self.month_list.index(udate[0])+1 d = date(int(urlitems[3]),int(urlitems[4]),int(urlitems[5]))
except: except:
return date.today()
d = int(udate[1])
y = int(udate[2])
try: try:
d = date(y,m,d) d = date(int(urlitems[4]),int(urlitems[5]),int(urlitems[6]))
except: except:
d = date.today return None
return d return d
earliest_date = date.today() - timedelta(days=oldest_article) if oldest_web_article is None:
earliest_date = date.today()
else:
earliest_date = date.today() - timedelta(days=oldest_web_article)
oldest_article = 365 # by default, a long time ago
__author__ = 'GRiker/Kovid Goyal/Nick Redding' __author__ = 'GRiker/Kovid Goyal/Nick Redding'
language = 'en' language = 'en'
requires_version = (0, 7, 5) requires_version = (0, 7, 5)
encoding = 'utf-8'
timefmt = '' timefmt = ''
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
simultaneous_downloads = 1
cover_margins = (18,18,'grey99') cover_margins = (18,18,'grey99')
remove_tags_before = dict(id='article') remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article') remove_tags_after = dict(id='article')
remove_tags = [dict(attrs={'class':[ remove_tags = [
dict(attrs={'class':[
'articleFooter', 'articleFooter',
'articleTools', 'articleTools',
'columnGroup doubleRule',
'columnGroup singleRule', 'columnGroup singleRule',
'columnGroup last', 'columnGroup last',
'columnGroup last', 'columnGroup last',
@ -140,7 +170,6 @@ class NYTimes(BasicNewsRecipe):
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module', 'entry-response module',
'icon enlargeThis',
'leftNavTabs', 'leftNavTabs',
'metaFootnote', 'metaFootnote',
'module box nav', 'module box nav',
@ -150,10 +179,43 @@ class NYTimes(BasicNewsRecipe):
'relatedSearchesModule', 'relatedSearchesModule',
'side_tool', 'side_tool',
'singleAd', 'singleAd',
'entry entry-utility', #added for DealBook
'entry-tags', #added for DealBook
'footer promos clearfix', #added for DealBook
'footer links clearfix', #added for DealBook
'tabsContainer', #added for other blog downloads
'column lastColumn', #added for other blog downloads
'pageHeaderWithLabel', #added for other gadgetwise downloads
'column two', #added for other blog downloads
'column two last', #added for other blog downloads
'column three', #added for other blog downloads
'column three last', #added for other blog downloads
'column four',#added for other blog downloads
'column four last',#added for other blog downloads
'column last', #added for other blog downloads
'entry entry-related',
'subNavigation tabContent active', #caucus blog navigation
'mediaOverlay slideshow',
'wideThumb',
'video', #added 02-11-2011
'videoHeader',#added 02-11-2011
'articleInlineVideoHolder', #added 02-11-2011
'assetCompanionAd',
re.compile('^subNavigation'), re.compile('^subNavigation'),
re.compile('^leaderboard'), re.compile('^leaderboard'),
re.compile('^module'), re.compile('^module'),
re.compile('commentCount')
]}), ]}),
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
dict(name='div', attrs={'class':'tweet'}),
dict(name='span', attrs={'class':'commentCount meta'}),
dict(name='div', attrs={'id':'header'}),
dict(name='div', attrs={'id':re.compile('commentsContainer')}), # bits, pogue, gadgetwise, open
dict(name='ul', attrs={'class':re.compile('entry-tools')}), # pogue, gadgetwise
dict(name='div', attrs={'class':re.compile('nocontent')}), # pogue, gadgetwise
dict(name='div', attrs={'id':re.compile('respond')}), # open
dict(name='div', attrs={'class':re.compile('entry-tags')}), # pogue
dict(id=[ dict(id=[
'adxLeaderboard', 'adxLeaderboard',
'adxSponLink', 'adxSponLink',
@ -183,22 +245,29 @@ class NYTimes(BasicNewsRecipe):
'side_index', 'side_index',
'side_tool', 'side_tool',
'toolsRight', 'toolsRight',
'skybox', #added for DealBook
'TopAd', #added for DealBook
'related-content', #added for DealBook
]), ]),
dict(name=['script', 'noscript', 'style','form','hr'])] dict(name=['script', 'noscript', 'style','form','hr'])]
no_stylesheets = True no_stylesheets = True
extra_css = ''' extra_css = '''
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; } .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; } .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
.timestamp { text-align: left; font-size: small; } .timestamp { font-weight: normal; text-align: left; font-size: 50%; }
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
a:link {text-decoration: none; } a:link {text-decoration: none; }
.date{font-size: 50%; }
.update{font-size: 50%; }
.articleBody { } .articleBody { }
.authorId {text-align: left; } .authorId {text-align: left; font-size: 50%; }
.image {text-align: center;} .image {text-align: center;}
.source {text-align: left; }''' .aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
.source {text-align: left; font-size: x-small; }'''
articles = {} articles = {}
@ -237,7 +306,7 @@ class NYTimes(BasicNewsRecipe):
def exclude_url(self,url): def exclude_url(self,url):
if not url.startswith("http"): if not url.startswith("http"):
return True return True
if not url.endswith(".html"): if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
return True return True
if 'nytimes.com' not in url: if 'nytimes.com' not in url:
return True return True
@ -280,88 +349,91 @@ class NYTimes(BasicNewsRecipe):
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.nytimes.com/auth/login')
br.form = br.forms().next()
br['userid'] = self.username
br['password'] = self.password
raw = br.submit().read()
if 'Please try again' in raw:
raise Exception('Your username and password are incorrect')
return br return br
def skip_ad_pages(self, soup): ## This doesn't work (and probably never did). It either gets another serve of the advertisement,
# Skip ad pages served before actual article ## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
skip_tag = soup.find(True, {'name':'skip'}) ##
if skip_tag is not None: ## def skip_ad_pages(self, soup):
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) ## # Skip ad pages served before actual article
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) ## skip_tag = soup.find(True, {'name':'skip'})
url += '?pagewanted=all' ## if skip_tag is not None:
self.log.warn("Skipping ad to article at '%s'" % url) ## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
return self.index_to_soup(url, raw=True) ## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
## url += '?pagewanted=all'
## self.log.warn("Skipping ad to article at '%s'" % url)
## return self.index_to_soup(url, raw=True)
cover_tag = 'NY_NYT'
def get_cover_url(self): def get_cover_url(self):
cover = None from datetime import timedelta, date
st = time.localtime() cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
year = str(st.tm_year) br = BasicNewsRecipe.get_browser()
month = "%.2d" % st.tm_mon daysback=1
day = "%.2d" % st.tm_mday try:
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg' br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
except: except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable") self.log("\nCover unavailable")
cover = None cover = None
return cover return cover
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
def short_title(self): def short_title(self):
return self.title return self.title
def index_to_soup(self, url_or_raw, raw=False):
''' def article_to_soup(self, url_or_raw, raw=False):
OVERRIDE of class method from contextlib import closing
deals with various page encodings between index and articles import copy
''' from calibre.ebooks.chardet import xml_to_unicode
def get_the_soup(docEncoding, url_or_raw, raw=False) :
if re.match(r'\w+://', url_or_raw): if re.match(r'\w+://', url_or_raw):
br = self.clone_browser(self.browser) br = self.clone_browser(self.browser)
f = br.open_novisit(url_or_raw) open_func = getattr(br, 'open_novisit', br.open)
with closing(open_func(url_or_raw)) as f:
_raw = f.read() _raw = f.read()
f.close()
if not _raw: if not _raw:
raise RuntimeError('Could not fetch index from %s'%url_or_raw) raise RuntimeError('Could not fetch index from %s'%url_or_raw)
else: else:
_raw = url_or_raw _raw = url_or_raw
if raw: if raw:
return _raw return _raw
if not isinstance(_raw, unicode) and self.encoding: if not isinstance(_raw, unicode) and self.encoding:
_raw = _raw.decode(docEncoding, 'replace') if callable(self.encoding):
massage = list(BeautifulSoup.MARKUP_MASSAGE) _raw = self.encoding(_raw)
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) else:
return BeautifulSoup(_raw, markupMassage=massage) _raw = _raw.decode(self.encoding, 'replace')
# Entry point nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
soup = get_the_soup( self.encoding, url_or_raw ) nmassage.extend(self.preprocess_regexps)
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'}) nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')] # Some websites have buggy doctype declarations that mess up beautifulsoup
if docEncoding == '' : # Remove comments as they can leave detritus when extracting tags leaves
docEncoding = self.encoding # multiple nested comments
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
usrc = self.preprocess_raw_html(usrc, url_or_raw)
return BeautifulSoup(usrc, markupMassage=nmassage)
if self.verbose > 2:
self.log( " document encoding: '%s'" % docEncoding)
if docEncoding != self.encoding :
soup = get_the_soup(docEncoding, url_or_raw)
return soup
def massageNCXText(self, description): def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters # Kindle TOC descriptions won't render certain characters
if description: if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&' # Replace '&' with '&'
massaged = re.sub("&","&", massaged) massaged = re.sub("&#038;","&", massaged)
massaged = re.sub("&amp;","&", massaged)
return self.fixChars(massaged) return self.fixChars(massaged)
else: else:
return description return description
@ -383,6 +455,16 @@ class NYTimes(BasicNewsRecipe):
if self.filterDuplicates: if self.filterDuplicates:
if url in self.url_list: if url in self.url_list:
return return
if self.webEdition:
date_tag = self.decode_url_date(url)
if date_tag is not None:
if self.oldest_web_article is not None:
if date_tag < self.earliest_date:
self.log("Skipping article %s" % url)
return
else:
self.log("Skipping article %s" % url)
return
self.url_list.append(url) self.url_list.append(url)
title = self.tag_to_string(a, use_alt=True).strip() title = self.tag_to_string(a, use_alt=True).strip()
description = '' description = ''
@ -407,6 +489,31 @@ class NYTimes(BasicNewsRecipe):
description=description, author=author, description=description, author=author,
content='')) content=''))
def get_tech_feeds(self,ans):
if self.getTechBlogs:
tech_articles = {}
key_list = []
save_oldest_article = self.oldest_article
save_max_articles_per_feed = self.max_articles_per_feed
self.oldest_article = self.tech_oldest_article
self.max_articles_per_feed = self.tech_max_articles_per_feed
self.feeds = self.tech_feeds
tech = self.parse_feeds()
self.oldest_article = save_oldest_article
self.max_articles_per_feed = save_max_articles_per_feed
self.feeds = None
for f in tech:
key_list.append(f.title)
tech_articles[f.title] = []
for a in f.articles:
tech_articles[f.title].append(
dict(title=a.title, url=a.url, date=a.date,
description=a.summary, author=a.author,
content=a.content))
tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
for x in tech_ans:
ans.append(x)
return ans
def parse_web_edition(self): def parse_web_edition(self):
@ -418,31 +525,41 @@ class NYTimes(BasicNewsRecipe):
if sec_title in self.excludeSections: if sec_title in self.excludeSections:
print "SECTION EXCLUDED: ",sec_title print "SECTION EXCLUDED: ",sec_title
continue continue
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html' try:
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html') soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
except:
continue
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
self.key = sec_title self.key = sec_title
# Find each article # Find each article
for div in soup.findAll(True, for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): attrs={'class':['section-headline', 'ledeStory', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['story', 'story headline'] : if div['class'] in ['story', 'story headline', 'storyHeader'] :
self.handle_article(div) self.handle_article(div)
elif div['class'] == 'ledeStory':
divsub = div.find('div','storyHeader')
if divsub is not None:
self.handle_article(divsub)
ulrefer = div.find('ul','refer')
if ulrefer is not None:
for lidiv in ulrefer.findAll('li'):
self.handle_article(lidiv)
elif div['class'] == 'headlinesOnly multiline flush': elif div['class'] == 'headlinesOnly multiline flush':
for lidiv in div.findAll('li'): for lidiv in div.findAll('li'):
self.handle_article(lidiv) self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.get_tech_feeds(self.ans))
def parse_todays_index(self): def parse_todays_index(self):
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
skipping = False skipping = False
# Find each article # Find each article
for div in soup.findAll(True, for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['section-headline','sectionHeader']: if div['class'] in ['section-headline','sectionHeader']:
self.key = string.capwords(self.feed_title(div)) self.key = string.capwords(self.feed_title(div))
self.key = self.key.replace('Op-ed','Op-Ed') self.key = self.key.replace('Op-ed','Op-Ed')
@ -466,7 +583,7 @@ class NYTimes(BasicNewsRecipe):
self.handle_article(lidiv) self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.get_tech_feeds(self.ans))
def parse_headline_index(self): def parse_headline_index(self):
@ -514,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
for h3_item in search_div.findAll('h3'): for h3_item in search_div.findAll('h3'):
byline = h3_item.h6 byline = h3_item.h6
if byline is not None: if byline is not None:
author = self.tag_to_string(byline,usa_alt=False) author = self.tag_to_string(byline,use_alt=False)
else: else:
author = '' author = ''
a = h3_item.find('a', href=True) a = h3_item.find('a', href=True)
@ -540,7 +657,7 @@ class NYTimes(BasicNewsRecipe):
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.get_tech_feeds(self.ans))
def parse_index(self): def parse_index(self):
if self.headlinesOnly: if self.headlinesOnly:
@ -550,32 +667,190 @@ class NYTimes(BasicNewsRecipe):
else: else:
return self.parse_todays_index() return self.parse_todays_index()
def strip_anchors(self,soup): def strip_anchors(self,soup,kill_all=False):
paras = soup.findAll(True) paras = soup.findAll(True)
for para in paras: for para in paras:
aTags = para.findAll('a') aTags = para.findAll('a')
for a in aTags: for a in aTags:
if a.img is None: if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace')) if kill_all or (self.recursions==0):
a.replaceWith(self.tag_to_string(a,False))
else:
if a.has_key('href'):
if a['href'].startswith('http://www.nytimes'):
if not a['href'].endswith('pagewanted=all'):
url = re.sub(r'\?.*', '', a['href'])
if self.exclude_url(url):
a.replaceWith(self.tag_to_string(a,False))
else:
a['href'] = url+'?pagewanted=all'
elif not (a['href'].startswith('http://pogue') or \
a['href'].startswith('http://bits') or \
a['href'].startswith('http://travel') or \
a['href'].startswith('http://business') or \
a['href'].startswith('http://tech') or \
a['href'].startswith('http://health') or \
a['href'].startswith('http://dealbook') or \
a['href'].startswith('http://open')):
a.replaceWith(self.tag_to_string(a,False))
return soup
def handle_tags(self,soup):
try:
print("HANDLE TAGS: TITLE = "+self.tag_to_string(soup.title))
except:
print("HANDLE TAGS: NO TITLE")
if soup is None:
print("ERROR: handle_tags received NoneType")
return None
## print("HANDLING AD FORWARD:")
## print(soup)
if self.keep_only_tags:
body = Tag(soup, 'body')
try:
if isinstance(self.keep_only_tags, dict):
self.keep_only_tags = [self.keep_only_tags]
for spec in self.keep_only_tags:
for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body)
except AttributeError: # soup has no body element
pass
def remove_beyond(tag, next):
while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next)
while after is not None:
ns = getattr(tag, next)
after.extract()
after = ns
tag = tag.parent
if self.remove_tags_after is not None:
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
for spec in rt:
tag = soup.find(**spec)
remove_beyond(tag, 'nextSibling')
if self.remove_tags_before is not None:
tag = soup.find(**self.remove_tags_before)
remove_beyond(tag, 'previousSibling')
for kwds in self.remove_tags:
for tag in soup.findAll(**kwds):
tag.extract()
return soup return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
skip_tag = soup.find(True, {'name':'skip'})
if skip_tag is not None:
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
url += '?pagewanted=all'
self.log.warn("Skipping ad to article at '%s'" % url)
sleep(5)
soup = self.handle_tags(self.article_to_soup(url))
if self.webEdition & (self.oldest_article>0): # check if the article is from one of the tech blogs
date_tag = soup.find(True,attrs={'class': ['dateline','date']}) blog=soup.find('div',attrs={'id':['pogue','bits','gadgetwise','open']})
if date_tag:
date_str = self.tag_to_string(date_tag,use_alt=False) if blog is not None:
date_str = date_str.replace('Published:','') old_body = soup.find('body')
date_items = date_str.split(',') new_body=Tag(soup,'body')
try: new_body.append(soup.find('div',attrs={'id':'content'}))
datestring = date_items[0]+' '+date_items[1] new_body.find('div',attrs={'id':'content'})['id']='blogcontent' # identify for postprocess_html
article_date = self.decode_us_date(datestring) old_body.replaceWith(new_body)
except: for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
article_date = date.today() if divr.find(text=re.compile('Sign up')):
if article_date < self.earliest_date: divr.extract()
self.log("Skipping article dated %s" % date_str) divr = soup.find('div',attrs={'id':re.compile('related-content')})
return None if divr is not None:
# handle related articles
rlist = []
ul = divr.find('ul')
if ul is not None:
for li in ul.findAll('li'):
atag = li.find('a')
if atag is not None:
if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
atag['href'].startswith('http://open'):
atag.find(text=True).replaceWith(self.massageNCXText(self.tag_to_string(atag,False)))
rlist.append(atag)
divr.extract()
if rlist != []:
asidediv = Tag(soup,'div',[('class','aside')])
if soup.find('hr') is None:
asidediv.append(Tag(soup,'hr'))
h4 = Tag(soup,'h4',[('class','asidenote')])
h4.insert(0,"Related Posts")
asidediv.append(h4)
ul = Tag(soup,'ul')
for r in rlist:
li = Tag(soup,'li',[('class','aside')])
r['class'] = 'aside'
li.append(r)
ul.append(li)
asidediv.append(ul)
asidediv.append(Tag(soup,'hr'))
smain = soup.find('body')
smain.append(asidediv)
for atag in soup.findAll('a'):
img = atag.find('img')
if img is not None:
atag.replaceWith(img)
elif not atag.has_key('href'):
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
hdr = soup.find('address')
if hdr is not None:
hdr.name='span'
for span_credit in soup.findAll('span','credit'):
sp = Tag(soup,'span')
span_credit.replaceWith(sp)
sp.append(Tag(soup,'br'))
sp.append(span_credit)
sp.append(Tag(soup,'br'))
else: # nytimes article
related = [] # these will be the related articles
first_outer = None # first related outer tag
first_related = None # first related tag
for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
for rdiv in soup.findAll('div','columnGroup doubleRule'):
if rdiv.find('h3') is not None:
if self.tag_to_string(rdiv.h3,False).startswith('Related'):
rdiv.h3.find(text=True).replaceWith("Related articles")
rdiv.h3['class'] = 'asidenote'
for litag in rdiv.findAll('li'):
if litag.find('a') is not None:
if litag.find('a')['href'].startswith('http://www.nytimes.com'):
url = re.sub(r'\?.*', '', litag.find('a')['href'])
litag.find('a')['href'] = url+'?pagewanted=all'
litag.extract()
related.append(litag)
if first_related is None:
first_related = rdiv
first_outer = outerdiv
else:
litag.extract()
if related != []:
for r in related:
if r.h6: # don't want the anchor inside a h6 tag
r.h6.replaceWith(r.h6.a)
first_related.ul.append(r)
first_related.insert(0,Tag(soup,'hr'))
first_related.append(Tag(soup,'hr'))
first_related['class'] = 'aside'
first_outer.replaceWith(first_related) # replace the outer tag with the related tag
for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
rdiv.extract()
kicker_tag = soup.find(attrs={'class':'kicker'}) kicker_tag = soup.find(attrs={'class':'kicker'})
if kicker_tag: # remove Op_Ed author head shots if kicker_tag: # remove Op_Ed author head shots
@ -584,9 +859,77 @@ class NYTimes(BasicNewsRecipe):
img_div = soup.find('div','inlineImage module') img_div = soup.find('div','inlineImage module')
if img_div: if img_div:
img_div.extract() img_div.extract()
return self.strip_anchors(soup)
def postprocess_html(self,soup, True): if self.useHighResImages:
try:
#open up all the "Enlarge this Image" pop-ups and download the full resolution jpegs
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
if enlargeThisList:
for popupref in enlargeThisList:
popupreflink = popupref.find('a')
if popupreflink:
reflinkstring = str(popupreflink['href'])
refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
refend = reflinkstring.find(".html", refstart) + len(".html")
reflinkstring = reflinkstring[refstart:refend]
popuppage = self.browser.open(reflinkstring)
popuphtml = popuppage.read()
popuppage.close()
if popuphtml:
st = time.localtime()
year = str(st.tm_year)
month = "%.2d" % st.tm_mon
day = "%.2d" % st.tm_mday
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
popupSoup = BeautifulSoup(popuphtml)
highResTag = popupSoup.find('img', {'src':highResImageLink})
if highResTag:
try:
newWidth = highResTag['width']
newHeight = highResTag['height']
imageTag = popupref.parent.find("img")
except:
self.log("Error: finding width and height of img")
popupref.extract()
if imageTag:
try:
imageTag['src'] = highResImageLink
imageTag['width'] = newWidth
imageTag['height'] = newHeight
except:
self.log("Error setting the src width and height parameters")
except Exception:
self.log("Error pulling high resolution images")
try:
#in case pulling images failed, delete the enlarge this text
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
if enlargeThisList:
for popupref in enlargeThisList:
popupref.extract()
except:
self.log("Error removing Enlarge this text")
return self.strip_anchors(soup,False)
def postprocess_html(self,soup,first_fetch):
if not first_fetch: # remove Related links
for aside in soup.findAll('div','aside'):
aside.extract()
soup = self.strip_anchors(soup,True)
if soup.find('div',attrs={'id':'blogcontent'}) is None:
if first_fetch:
aside = soup.find('div','aside')
if aside is not None: # move the related list to the end of the article
art = soup.find('div',attrs={'id':'article'})
if art is None:
art = soup.find('div',attrs={'class':'article'})
if art is not None:
art.append(aside)
try: try:
if self.one_picture_per_article: if self.one_picture_per_article:
# Remove all images after first # Remove all images after first
@ -642,6 +985,7 @@ class NYTimes(BasicNewsRecipe):
try: try:
# Change <nyt_headline> to <h2> # Change <nyt_headline> to <h2>
h1 = soup.find('h1') h1 = soup.find('h1')
blogheadline = str(h1) #added for dealbook
if h1: if h1:
headline = h1.find("nyt_headline") headline = h1.find("nyt_headline")
if headline: if headline:
@ -649,13 +993,19 @@ class NYTimes(BasicNewsRecipe):
tag['class'] = "headline" tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0])) tag.insert(0, self.fixChars(headline.contents[0]))
h1.replaceWith(tag) h1.replaceWith(tag)
elif blogheadline.find('entry-title'):#added for dealbook
tag = Tag(soup, "h2")#added for dealbook
tag['class'] = "headline"#added for dealbook
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
h1.replaceWith(tag)#added for dealbook
else: else:
# Blog entry - replace headline, remove <hr> tags # Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
headline = soup.find('title') headline = soup.find('title')
if headline: if headline:
tag = Tag(soup, "h2") tag = Tag(soup, "h2")
tag['class'] = "headline" tag['class'] = "headline"
tag.insert(0, self.fixChars(headline.contents[0])) tag.insert(0, self.fixChars(headline.renderContents()))
soup.insert(0, tag) soup.insert(0, tag)
hrs = soup.findAll('hr') hrs = soup.findAll('hr')
for hr in hrs: for hr in hrs:
@ -663,6 +1013,29 @@ class NYTimes(BasicNewsRecipe):
except: except:
self.log("ERROR: Problem in Change <nyt_headline> to <h2>") self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
try:
#if this is from a blog (dealbook, fix the byline format
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
if bylineauthor:
tag = Tag(soup, "h6")
tag['class'] = "byline"
tag.insert(0, self.fixChars(bylineauthor.renderContents()))
bylineauthor.replaceWith(tag)
except:
self.log("ERROR: fixing byline author format")
try:
#if this is a blog (dealbook) fix the credit style for the pictures
blogcredit = soup.find('div',attrs={'class':'credit'})
if blogcredit:
tag = Tag(soup, "h6")
tag['class'] = "credit"
tag.insert(0, self.fixChars(blogcredit.renderContents()))
blogcredit.replaceWith(tag)
except:
self.log("ERROR: fixing credit format")
try: try:
# Change <h1> to <h3> - used in editorial blogs # Change <h1> to <h3> - used in editorial blogs
masthead = soup.find("h1") masthead = soup.find("h1")
@ -685,6 +1058,13 @@ class NYTimes(BasicNewsRecipe):
subhead.replaceWith(bTag) subhead.replaceWith(bTag)
except: except:
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs") self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
try:
#remove the <strong> update tag
blogupdated = soup.find('span', {'class':'update'})
if blogupdated:
blogupdated.replaceWith("")
except:
self.log("ERROR: Removing strong tag")
try: try:
divTag = soup.find('div',attrs={'id':'articleBody'}) divTag = soup.find('div',attrs={'id':'articleBody'})
@ -708,16 +1088,16 @@ class NYTimes(BasicNewsRecipe):
return soup return soup
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'): if not first:
return
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'}) idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
if idxdiv is not None: if idxdiv is not None:
if idxdiv.img: if idxdiv.img:
self.add_toc_thumbnail(article, idxdiv.img['src']) self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',idxdiv.img['src']))
else: else:
img = soup.find('img') img = soup.find('body').find('img')
if img is not None: if img is not None:
self.add_toc_thumbnail(article, img['src']) self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',img['src']))
shortparagraph = "" shortparagraph = ""
try: try:
if len(article.text_summary.strip()) == 0: if len(article.text_summary.strip()) == 0:
@ -731,13 +1111,22 @@ class NYTimes(BasicNewsRecipe):
#account for blank paragraphs and short paragraphs by appending them to longer ones #account for blank paragraphs and short paragraphs by appending them to longer ones
if len(refparagraph) > 0: if len(refparagraph) > 0:
if len(refparagraph) > 70: #approximately one line of text if len(refparagraph) > 70: #approximately one line of text
article.summary = article.text_summary = shortparagraph + refparagraph newpara = shortparagraph + refparagraph
newparaDateline,newparaEm,newparaDesc = newpara.partition('&mdash;')
if newparaEm == '':
newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
if newparaEm == '':
newparaDesc = newparaDateline
article.summary = article.text_summary = newparaDesc.strip()
return return
else: else:
shortparagraph = refparagraph + " " shortparagraph = refparagraph + " "
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"): if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
shortparagraph = shortparagraph + "- " shortparagraph = shortparagraph + "- "
else:
article.summary = article.text_summary = self.massageNCXText(article.text_summary)
except: except:
self.log("Error creating article descriptions") self.log("Error creating article descriptions")
return return

View File

@ -6,31 +6,42 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
nytimes.com nytimes.com
''' '''
import re, string, time import re, string, time
from calibre import entity_to_unicode, strftime from calibre import strftime
from datetime import timedelta, date from datetime import timedelta, date
from time import sleep
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe): class NYTimes(BasicNewsRecipe):
recursions=1 # set this to zero to omit Related articles lists
# set getTechBlogs to True to include the technology blogs
# set tech_oldest_article to control article age
# set tech_max_articles_per_feed to control article count
getTechBlogs = True
remove_empty_feeds = True
tech_oldest_article = 14
tech_max_articles_per_feed = 25
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored. # set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
headlinesOnly = False headlinesOnly = False
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the # set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
# number of days old an article can be for inclusion. If oldest_article = 0 all articles # number of days old an article can be for inclusion. If oldest_web_article = None all articles
# will be included. Note: oldest_article is ignored if webEdition = False # will be included. Note: oldest_web_article is ignored if webEdition = False
webEdition = False webEdition = False
oldest_article = 7 oldest_web_article = 7
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
replaceKindleVersion = False
# download higher resolution images than the small thumbnails typically included in the article # download higher resolution images than the small thumbnails typically included in the article
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper # the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
useHighResImages = True useHighResImages = True
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
replaceKindleVersion = False
# includeSections: List of sections to include. If empty, all sections found will be included. # includeSections: List of sections to include. If empty, all sections found will be included.
# Otherwise, only the sections named will be included. For example, # Otherwise, only the sections named will be included. For example,
# #
@ -90,60 +101,68 @@ class NYTimes(BasicNewsRecipe):
('Education',u'education'), ('Education',u'education'),
('Multimedia',u'multimedia'), ('Multimedia',u'multimedia'),
(u'Obituaries',u'obituaries'), (u'Obituaries',u'obituaries'),
(u'Sunday Magazine',u'magazine'), (u'Sunday Magazine',u'magazine')
(u'Week in Review',u'weekinreview')] ]
tech_feeds = [
(u'Tech - Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
(u'Tech - Bits', u'http://bits.blogs.nytimes.com/feed/'),
(u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
]
if headlinesOnly: if headlinesOnly:
title='New York Times Headlines' title='New York Times Headlines'
description = 'Headlines from the New York Times' description = 'Headlines from the New York Times'
needs_subscription = True needs_subscription = False
elif webEdition: elif webEdition:
title='New York Times (Web)' title='New York Times (Web)'
description = 'New York Times on the Web' description = 'New York Times on the Web'
needs_subscription = True needs_subscription = False
elif replaceKindleVersion: elif replaceKindleVersion:
title='The New York Times' title='The New York Times'
description = 'Today\'s New York Times' description = 'Today\'s New York Times'
needs_subscription = True needs_subscription = False
else: else:
title='New York Times' title='New York Times'
description = 'Today\'s New York Times. Needs subscription from http://www.nytimes.com' description = 'Today\'s New York Times'
needs_subscription = True needs_subscription = False
def decode_url_date(self,url):
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december'] urlitems = url.split('/')
def decode_us_date(self,datestr):
udate = datestr.strip().lower().split()
try: try:
m = self.month_list.index(udate[0])+1 d = date(int(urlitems[3]),int(urlitems[4]),int(urlitems[5]))
except: except:
return date.today()
d = int(udate[1])
y = int(udate[2])
try: try:
d = date(y,m,d) d = date(int(urlitems[4]),int(urlitems[5]),int(urlitems[6]))
except: except:
d = date.today return None
return d return d
earliest_date = date.today() - timedelta(days=oldest_article) if oldest_web_article is None:
earliest_date = date.today()
else:
earliest_date = date.today() - timedelta(days=oldest_web_article)
oldest_article = 365 # by default, a long time ago
__author__ = 'GRiker/Kovid Goyal/Nick Redding/Ben Collier' __author__ = 'GRiker/Kovid Goyal/Nick Redding'
language = 'en' language = 'en'
requires_version = (0, 7, 5) requires_version = (0, 7, 5)
encoding = 'utf-8'
timefmt = '' timefmt = ''
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
simultaneous_downloads = 1
cover_margins = (18,18,'grey99') cover_margins = (18,18,'grey99')
remove_tags_before = dict(id='article') remove_tags_before = dict(id='article')
remove_tags_after = dict(id='article') remove_tags_after = dict(id='article')
remove_tags = [dict(attrs={'class':[ remove_tags = [
dict(attrs={'class':[
'articleFooter', 'articleFooter',
'articleTools', 'articleTools',
'columnGroup doubleRule',
'columnGroup singleRule', 'columnGroup singleRule',
'columnGroup last', 'columnGroup last',
'columnGroup last', 'columnGroup last',
@ -151,7 +170,6 @@ class NYTimes(BasicNewsRecipe):
'dottedLine', 'dottedLine',
'entry-meta', 'entry-meta',
'entry-response module', 'entry-response module',
#'icon enlargeThis', #removed to provide option for high res images
'leftNavTabs', 'leftNavTabs',
'metaFootnote', 'metaFootnote',
'module box nav', 'module box nav',
@ -175,12 +193,9 @@ class NYTimes(BasicNewsRecipe):
'column four',#added for other blog downloads 'column four',#added for other blog downloads
'column four last',#added for other blog downloads 'column four last',#added for other blog downloads
'column last', #added for other blog downloads 'column last', #added for other blog downloads
'timestamp published', #added for other blog downloads
'entry entry-related', 'entry entry-related',
'subNavigation tabContent active', #caucus blog navigation 'subNavigation tabContent active', #caucus blog navigation
'columnGroup doubleRule',
'mediaOverlay slideshow', 'mediaOverlay slideshow',
'headlinesOnly multiline flush',
'wideThumb', 'wideThumb',
'video', #added 02-11-2011 'video', #added 02-11-2011
'videoHeader',#added 02-11-2011 'videoHeader',#added 02-11-2011
@ -189,7 +204,18 @@ class NYTimes(BasicNewsRecipe):
re.compile('^subNavigation'), re.compile('^subNavigation'),
re.compile('^leaderboard'), re.compile('^leaderboard'),
re.compile('^module'), re.compile('^module'),
re.compile('commentCount')
]}), ]}),
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
dict(name='div', attrs={'class':'tweet'}),
dict(name='span', attrs={'class':'commentCount meta'}),
dict(name='div', attrs={'id':'header'}),
dict(name='div', attrs={'id':re.compile('commentsContainer')}), # bits, pogue, gadgetwise, open
dict(name='ul', attrs={'class':re.compile('entry-tools')}), # pogue, gadgetwise
dict(name='div', attrs={'class':re.compile('nocontent')}), # pogue, gadgetwise
dict(name='div', attrs={'id':re.compile('respond')}), # open
dict(name='div', attrs={'class':re.compile('entry-tags')}), # pogue
dict(id=[ dict(id=[
'adxLeaderboard', 'adxLeaderboard',
'adxSponLink', 'adxSponLink',
@ -227,17 +253,21 @@ class NYTimes(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
extra_css = ''' extra_css = '''
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; } .articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; } .byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
.timestamp { text-align: left; font-size: small; } .timestamp { font-weight: normal; text-align: left; font-size: 50%; }
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; } .caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
a:link {text-decoration: none; } a:link {text-decoration: none; }
.date{font-size: 50%; }
.update{font-size: 50%; }
.articleBody { } .articleBody { }
.authorId {text-align: left; } .authorId {text-align: left; font-size: 50%; }
.image {text-align: center;} .image {text-align: center;}
.source {text-align: left; }''' .aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
.source {text-align: left; font-size: x-small; }'''
articles = {} articles = {}
@ -276,7 +306,7 @@ class NYTimes(BasicNewsRecipe):
def exclude_url(self,url): def exclude_url(self,url):
if not url.startswith("http"): if not url.startswith("http"):
return True return True
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url and 'blogs.nytimes.com' not in url: #added for DealBook if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
return True return True
if 'nytimes.com' not in url: if 'nytimes.com' not in url:
return True return True
@ -319,88 +349,91 @@ class NYTimes(BasicNewsRecipe):
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://www.nytimes.com/auth/login')
br.form = br.forms().next()
br['userid'] = self.username
br['password'] = self.password
raw = br.submit().read()
if 'Please try again' in raw:
raise Exception('Your username and password are incorrect')
return br return br
def skip_ad_pages(self, soup): ## This doesn't work (and probably never did). It either gets another serve of the advertisement,
# Skip ad pages served before actual article ## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
skip_tag = soup.find(True, {'name':'skip'}) ##
if skip_tag is not None: ## def skip_ad_pages(self, soup):
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href']) ## # Skip ad pages served before actual article
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href']) ## skip_tag = soup.find(True, {'name':'skip'})
url += '?pagewanted=all' ## if skip_tag is not None:
self.log.warn("Skipping ad to article at '%s'" % url) ## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
return self.index_to_soup(url, raw=True) ## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
## url += '?pagewanted=all'
## self.log.warn("Skipping ad to article at '%s'" % url)
## return self.index_to_soup(url, raw=True)
cover_tag = 'NY_NYT'
def get_cover_url(self): def get_cover_url(self):
cover = None from datetime import timedelta, date
st = time.localtime() cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
year = str(st.tm_year) br = BasicNewsRecipe.get_browser()
month = "%.2d" % st.tm_mon daysback=1
day = "%.2d" % st.tm_mday try:
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg' br.open(cover)
except:
while daysback<7:
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
try: try:
br.open(cover) br.open(cover)
except: except:
daysback = daysback+1
continue
break
if daysback==7:
self.log("\nCover unavailable") self.log("\nCover unavailable")
cover = None cover = None
return cover return cover
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
def short_title(self): def short_title(self):
return self.title return self.title
def index_to_soup(self, url_or_raw, raw=False):
''' def article_to_soup(self, url_or_raw, raw=False):
OVERRIDE of class method from contextlib import closing
deals with various page encodings between index and articles import copy
''' from calibre.ebooks.chardet import xml_to_unicode
def get_the_soup(docEncoding, url_or_raw, raw=False) :
if re.match(r'\w+://', url_or_raw): if re.match(r'\w+://', url_or_raw):
br = self.clone_browser(self.browser) br = self.clone_browser(self.browser)
f = br.open_novisit(url_or_raw) open_func = getattr(br, 'open_novisit', br.open)
with closing(open_func(url_or_raw)) as f:
_raw = f.read() _raw = f.read()
f.close()
if not _raw: if not _raw:
raise RuntimeError('Could not fetch index from %s'%url_or_raw) raise RuntimeError('Could not fetch index from %s'%url_or_raw)
else: else:
_raw = url_or_raw _raw = url_or_raw
if raw: if raw:
return _raw return _raw
if not isinstance(_raw, unicode) and self.encoding: if not isinstance(_raw, unicode) and self.encoding:
_raw = _raw.decode(docEncoding, 'replace') if callable(self.encoding):
massage = list(BeautifulSoup.MARKUP_MASSAGE) _raw = self.encoding(_raw)
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding))) else:
return BeautifulSoup(_raw, markupMassage=massage) _raw = _raw.decode(self.encoding, 'replace')
# Entry point nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
soup = get_the_soup( self.encoding, url_or_raw ) nmassage.extend(self.preprocess_regexps)
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'}) nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')] # Some websites have buggy doctype declarations that mess up beautifulsoup
if docEncoding == '' : # Remove comments as they can leave detritus when extracting tags leaves
docEncoding = self.encoding # multiple nested comments
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
usrc = self.preprocess_raw_html(usrc, url_or_raw)
return BeautifulSoup(usrc, markupMassage=nmassage)
if self.verbose > 2:
self.log( " document encoding: '%s'" % docEncoding)
if docEncoding != self.encoding :
soup = get_the_soup(docEncoding, url_or_raw)
return soup
def massageNCXText(self, description): def massageNCXText(self, description):
# Kindle TOC descriptions won't render certain characters # Kindle TOC descriptions won't render certain characters
if description: if description:
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)) massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
# Replace '&' with '&' # Replace '&' with '&'
massaged = re.sub("&","&", massaged) massaged = re.sub("&#038;","&", massaged)
massaged = re.sub("&amp;","&", massaged)
return self.fixChars(massaged) return self.fixChars(massaged)
else: else:
return description return description
@ -422,6 +455,16 @@ class NYTimes(BasicNewsRecipe):
if self.filterDuplicates: if self.filterDuplicates:
if url in self.url_list: if url in self.url_list:
return return
if self.webEdition:
date_tag = self.decode_url_date(url)
if date_tag is not None:
if self.oldest_web_article is not None:
if date_tag < self.earliest_date:
self.log("Skipping article %s" % url)
return
else:
self.log("Skipping article %s" % url)
return
self.url_list.append(url) self.url_list.append(url)
title = self.tag_to_string(a, use_alt=True).strip() title = self.tag_to_string(a, use_alt=True).strip()
description = '' description = ''
@ -446,6 +489,31 @@ class NYTimes(BasicNewsRecipe):
description=description, author=author, description=description, author=author,
content='')) content=''))
def get_tech_feeds(self,ans):
if self.getTechBlogs:
tech_articles = {}
key_list = []
save_oldest_article = self.oldest_article
save_max_articles_per_feed = self.max_articles_per_feed
self.oldest_article = self.tech_oldest_article
self.max_articles_per_feed = self.tech_max_articles_per_feed
self.feeds = self.tech_feeds
tech = self.parse_feeds()
self.oldest_article = save_oldest_article
self.max_articles_per_feed = save_max_articles_per_feed
self.feeds = None
for f in tech:
key_list.append(f.title)
tech_articles[f.title] = []
for a in f.articles:
tech_articles[f.title].append(
dict(title=a.title, url=a.url, date=a.date,
description=a.summary, author=a.author,
content=a.content))
tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
for x in tech_ans:
ans.append(x)
return ans
def parse_web_edition(self): def parse_web_edition(self):
@ -457,31 +525,41 @@ class NYTimes(BasicNewsRecipe):
if sec_title in self.excludeSections: if sec_title in self.excludeSections:
print "SECTION EXCLUDED: ",sec_title print "SECTION EXCLUDED: ",sec_title
continue continue
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html' try:
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html') soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
except:
continue
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
self.key = sec_title self.key = sec_title
# Find each article # Find each article
for div in soup.findAll(True, for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): attrs={'class':['section-headline', 'ledeStory', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['story', 'story headline'] : if div['class'] in ['story', 'story headline', 'storyHeader'] :
self.handle_article(div) self.handle_article(div)
elif div['class'] == 'ledeStory':
divsub = div.find('div','storyHeader')
if divsub is not None:
self.handle_article(divsub)
ulrefer = div.find('ul','refer')
if ulrefer is not None:
for lidiv in ulrefer.findAll('li'):
self.handle_article(lidiv)
elif div['class'] == 'headlinesOnly multiline flush': elif div['class'] == 'headlinesOnly multiline flush':
for lidiv in div.findAll('li'): for lidiv in div.findAll('li'):
self.handle_article(lidiv) self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.get_tech_feeds(self.ans))
def parse_todays_index(self): def parse_todays_index(self):
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html') soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
skipping = False skipping = False
# Find each article # Find each article
for div in soup.findAll(True, for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}): attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['section-headline','sectionHeader']: if div['class'] in ['section-headline','sectionHeader']:
self.key = string.capwords(self.feed_title(div)) self.key = string.capwords(self.feed_title(div))
self.key = self.key.replace('Op-ed','Op-Ed') self.key = self.key.replace('Op-ed','Op-Ed')
@ -505,7 +583,7 @@ class NYTimes(BasicNewsRecipe):
self.handle_article(lidiv) self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.get_tech_feeds(self.ans))
def parse_headline_index(self): def parse_headline_index(self):
@ -553,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
for h3_item in search_div.findAll('h3'): for h3_item in search_div.findAll('h3'):
byline = h3_item.h6 byline = h3_item.h6
if byline is not None: if byline is not None:
author = self.tag_to_string(byline,usa_alt=False) author = self.tag_to_string(byline,use_alt=False)
else: else:
author = '' author = ''
a = h3_item.find('a', href=True) a = h3_item.find('a', href=True)
@ -579,7 +657,7 @@ class NYTimes(BasicNewsRecipe):
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)] self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans) return self.filter_ans(self.get_tech_feeds(self.ans))
def parse_index(self): def parse_index(self):
if self.headlinesOnly: if self.headlinesOnly:
@ -589,40 +667,198 @@ class NYTimes(BasicNewsRecipe):
else: else:
return self.parse_todays_index() return self.parse_todays_index()
def strip_anchors(self,soup): def strip_anchors(self,soup,kill_all=False):
paras = soup.findAll(True) paras = soup.findAll(True)
for para in paras: for para in paras:
aTags = para.findAll('a') aTags = para.findAll('a')
for a in aTags: for a in aTags:
if a.img is None: if a.img is None:
a.replaceWith(a.renderContents().decode('cp1252','replace')) if kill_all or (self.recursions==0):
a.replaceWith(self.tag_to_string(a,False))
else:
if a.has_key('href'):
if a['href'].startswith('http://www.nytimes'):
if not a['href'].endswith('pagewanted=all'):
url = re.sub(r'\?.*', '', a['href'])
if self.exclude_url(url):
a.replaceWith(self.tag_to_string(a,False))
else:
a['href'] = url+'?pagewanted=all'
elif not (a['href'].startswith('http://pogue') or \
a['href'].startswith('http://bits') or \
a['href'].startswith('http://travel') or \
a['href'].startswith('http://business') or \
a['href'].startswith('http://tech') or \
a['href'].startswith('http://health') or \
a['href'].startswith('http://dealbook') or \
a['href'].startswith('http://open')):
a.replaceWith(self.tag_to_string(a,False))
return soup
def handle_tags(self,soup):
try:
print("HANDLE TAGS: TITLE = "+self.tag_to_string(soup.title))
except:
print("HANDLE TAGS: NO TITLE")
if soup is None:
print("ERROR: handle_tags received NoneType")
return None
## print("HANDLING AD FORWARD:")
## print(soup)
if self.keep_only_tags:
body = Tag(soup, 'body')
try:
if isinstance(self.keep_only_tags, dict):
self.keep_only_tags = [self.keep_only_tags]
for spec in self.keep_only_tags:
for tag in soup.find('body').findAll(**spec):
body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body)
except AttributeError: # soup has no body element
pass
def remove_beyond(tag, next):
while tag is not None and getattr(tag, 'name', None) != 'body':
after = getattr(tag, next)
while after is not None:
ns = getattr(tag, next)
after.extract()
after = ns
tag = tag.parent
if self.remove_tags_after is not None:
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
for spec in rt:
tag = soup.find(**spec)
remove_beyond(tag, 'nextSibling')
if self.remove_tags_before is not None:
tag = soup.find(**self.remove_tags_before)
remove_beyond(tag, 'previousSibling')
for kwds in self.remove_tags:
for tag in soup.findAll(**kwds):
tag.extract()
return soup return soup
def preprocess_html(self, soup): def preprocess_html(self, soup):
if self.webEdition & (self.oldest_article>0): print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
date_tag = soup.find(True,attrs={'class': ['dateline','date']}) skip_tag = soup.find(True, {'name':'skip'})
if date_tag: if skip_tag is not None:
date_str = self.tag_to_string(date_tag,use_alt=False) url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
date_str = date_str.replace('Published:','') url += '?pagewanted=all'
date_items = date_str.split(',') self.log.warn("Skipping ad to article at '%s'" % url)
try: sleep(5)
datestring = date_items[0]+' '+date_items[1] soup = self.handle_tags(self.article_to_soup(url))
article_date = self.decode_us_date(datestring)
except:
article_date = date.today()
if article_date < self.earliest_date:
self.log("Skipping article dated %s" % date_str)
return None
#all articles are from today, no need to print the date on every page # check if the article is from one of the tech blogs
try: blog=soup.find('div',attrs={'id':['pogue','bits','gadgetwise','open']})
if not self.webEdition:
date_tag = soup.find(True,attrs={'class': ['dateline','date']}) if blog is not None:
if date_tag: old_body = soup.find('body')
date_tag.extract() new_body=Tag(soup,'body')
except: new_body.append(soup.find('div',attrs={'id':'content'}))
self.log("Error removing the published date") new_body.find('div',attrs={'id':'content'})['id']='blogcontent' # identify for postprocess_html
old_body.replaceWith(new_body)
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
if divr.find(text=re.compile('Sign up')):
divr.extract()
divr = soup.find('div',attrs={'id':re.compile('related-content')})
if divr is not None:
# handle related articles
rlist = []
ul = divr.find('ul')
if ul is not None:
for li in ul.findAll('li'):
atag = li.find('a')
if atag is not None:
if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
atag['href'].startswith('http://open'):
atag.find(text=True).replaceWith(self.massageNCXText(self.tag_to_string(atag,False)))
rlist.append(atag)
divr.extract()
if rlist != []:
asidediv = Tag(soup,'div',[('class','aside')])
if soup.find('hr') is None:
asidediv.append(Tag(soup,'hr'))
h4 = Tag(soup,'h4',[('class','asidenote')])
h4.insert(0,"Related Posts")
asidediv.append(h4)
ul = Tag(soup,'ul')
for r in rlist:
li = Tag(soup,'li',[('class','aside')])
r['class'] = 'aside'
li.append(r)
ul.append(li)
asidediv.append(ul)
asidediv.append(Tag(soup,'hr'))
smain = soup.find('body')
smain.append(asidediv)
for atag in soup.findAll('a'):
img = atag.find('img')
if img is not None:
atag.replaceWith(img)
elif not atag.has_key('href'):
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
hdr = soup.find('address')
if hdr is not None:
hdr.name='span'
for span_credit in soup.findAll('span','credit'):
sp = Tag(soup,'span')
span_credit.replaceWith(sp)
sp.append(Tag(soup,'br'))
sp.append(span_credit)
sp.append(Tag(soup,'br'))
else: # nytimes article
related = [] # these will be the related articles
first_outer = None # first related outer tag
first_related = None # first related tag
for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
for rdiv in soup.findAll('div','columnGroup doubleRule'):
if rdiv.find('h3') is not None:
if self.tag_to_string(rdiv.h3,False).startswith('Related'):
rdiv.h3.find(text=True).replaceWith("Related articles")
rdiv.h3['class'] = 'asidenote'
for litag in rdiv.findAll('li'):
if litag.find('a') is not None:
if litag.find('a')['href'].startswith('http://www.nytimes.com'):
url = re.sub(r'\?.*', '', litag.find('a')['href'])
litag.find('a')['href'] = url+'?pagewanted=all'
litag.extract()
related.append(litag)
if first_related is None:
first_related = rdiv
first_outer = outerdiv
else:
litag.extract()
if related != []:
for r in related:
if r.h6: # don't want the anchor inside a h6 tag
r.h6.replaceWith(r.h6.a)
first_related.ul.append(r)
first_related.insert(0,Tag(soup,'hr'))
first_related.append(Tag(soup,'hr'))
first_related['class'] = 'aside'
first_outer.replaceWith(first_related) # replace the outer tag with the related tag
for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
rdiv.extract()
kicker_tag = soup.find(attrs={'class':'kicker'})
if kicker_tag: # remove Op_Ed author head shots
tagline = self.tag_to_string(kicker_tag)
if tagline=='Op-Ed Columnist':
img_div = soup.find('div','inlineImage module')
if img_div:
img_div.extract()
if self.useHighResImages: if self.useHighResImages:
try: try:
@ -667,26 +903,6 @@ class NYTimes(BasicNewsRecipe):
except Exception: except Exception:
self.log("Error pulling high resolution images") self.log("Error pulling high resolution images")
try:
#remove "Related content" bar
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ','articleInline runaroundLeft lastArticleInline']})
if runAroundsFound:
for runAround in runAroundsFound:
#find all section headers
hlines = runAround.findAll(True ,{'class':['sectionHeader','sectionHeader flushBottom']})
if hlines:
for hline in hlines:
hline.extract()
#find all section headers
hlines = runAround.findAll('h6')
if hlines:
for hline in hlines:
hline.extract()
except:
self.log("Error removing related content bar")
try: try:
#in case pulling images failed, delete the enlarge this text #in case pulling images failed, delete the enlarge this text
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'}) enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
@ -696,9 +912,24 @@ class NYTimes(BasicNewsRecipe):
except: except:
self.log("Error removing Enlarge this text") self.log("Error removing Enlarge this text")
return self.strip_anchors(soup)
def postprocess_html(self,soup, True): return self.strip_anchors(soup,False)
def postprocess_html(self,soup,first_fetch):
if not first_fetch: # remove Related links
for aside in soup.findAll('div','aside'):
aside.extract()
soup = self.strip_anchors(soup,True)
if soup.find('div',attrs={'id':'blogcontent'}) is None:
if first_fetch:
aside = soup.find('div','aside')
if aside is not None: # move the related list to the end of the article
art = soup.find('div',attrs={'id':'article'})
if art is None:
art = soup.find('div',attrs={'class':'article'})
if art is not None:
art.append(aside)
try: try:
if self.one_picture_per_article: if self.one_picture_per_article:
# Remove all images after first # Remove all images after first
@ -855,23 +1086,22 @@ class NYTimes(BasicNewsRecipe):
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS") self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
return soup return soup
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'): if not first:
return
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'}) idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
if idxdiv is not None: if idxdiv is not None:
if idxdiv.img: if idxdiv.img:
self.add_toc_thumbnail(article, idxdiv.img['src']) self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',idxdiv.img['src']))
else: else:
img = soup.find('img') img = soup.find('body').find('img')
if img is not None: if img is not None:
self.add_toc_thumbnail(article, img['src']) self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',img['src']))
shortparagraph = "" shortparagraph = ""
try: try:
if len(article.text_summary.strip()) == 0: if len(article.text_summary.strip()) == 0:
articlebodies = soup.findAll('div',attrs={'class':'articleBody'}) articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
if not articlebodies: #added to account for blog formats
articlebodies = soup.findAll('div', attrs={'class':'entry-content'}) #added to account for blog formats
if articlebodies: if articlebodies:
for articlebody in articlebodies: for articlebody in articlebodies:
if articlebody: if articlebody:
@ -880,15 +1110,23 @@ class NYTimes(BasicNewsRecipe):
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip() refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
#account for blank paragraphs and short paragraphs by appending them to longer ones #account for blank paragraphs and short paragraphs by appending them to longer ones
if len(refparagraph) > 0: if len(refparagraph) > 0:
if len(refparagraph) > 140: #approximately two lines of text if len(refparagraph) > 70: #approximately one line of text
article.summary = article.text_summary = shortparagraph + refparagraph newpara = shortparagraph + refparagraph
newparaDateline,newparaEm,newparaDesc = newpara.partition('&mdash;')
if newparaEm == '':
newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
if newparaEm == '':
newparaDesc = newparaDateline
article.summary = article.text_summary = newparaDesc.strip()
return return
else: else:
shortparagraph = refparagraph + " " shortparagraph = refparagraph + " "
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"): if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
shortparagraph = shortparagraph + "- " shortparagraph = shortparagraph + "- "
else:
article.summary = article.text_summary = self.massageNCXText(article.text_summary)
except: except:
self.log("Error creating article descriptions") self.log("Error creating article descriptions")
return return

View File

@ -8,19 +8,19 @@ Fetch sueddeutsche.de
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class Sueddeutsche(BasicNewsRecipe): class Sueddeutsche(BasicNewsRecipe):
title = u'Süddeutsche.de' # 2012-01-26 AGe Correct Title title = u'Süddeutsche.de'
description = 'News from Germany, Access to online content' # 2012-01-26 AGe description = 'News from Germany, Access to online content'
__author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26 __author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2012-12-05
publisher = u'Süddeutsche Zeitung' # 2012-01-26 AGe add publisher = u'Süddeutsche Zeitung'
category = 'news, politics, Germany' # 2012-01-26 AGe add category = 'news, politics, Germany'
timefmt = ' [%a, %d %b %Y]' # 2012-01-26 AGe add %a timefmt = ' [%a, %d %b %Y]'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'de' language = 'de'
encoding = 'utf-8' encoding = 'utf-8'
publication_type = 'newspaper' # 2012-01-26 add publication_type = 'newspaper'
cover_source = 'http://www.sueddeutsche.de/verlag' # 2012-01-26 AGe add from Darko Miletic paid content source cover_source = 'http://www.sueddeutsche.de/verlag' # 2012-01-26 AGe add from Darko Miletic paid content source
masthead_url = 'http://www.sueddeutsche.de/static_assets/build/img/sdesiteheader/logo_homepage.441d531c.png' # 2012-01-26 AGe add masthead_url = 'http://www.sueddeutsche.de/static_assets/img/sdesiteheader/logo_standard.a152b0df.png' # 2012-12-05 AGe add
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
@ -40,9 +40,9 @@ class Sueddeutsche(BasicNewsRecipe):
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'), (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'), (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'), (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
(u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'), #2012-01-26 AGe New (u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'),
(u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'), #2012-01-26 AGe New (u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'),
(u'Stil', u'http://rss.sueddeutsche.de/rss/stil'), #2012-01-26 AGe New (u'Stil', u'http://rss.sueddeutsche.de/rss/stil'),
(u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'), (u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'), (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'), (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),

View File

@ -2,8 +2,8 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '4 February 2011, desUBIKado' __copyright__ = '4 February 2011, desUBIKado'
__author__ = 'desUBIKado' __author__ = 'desUBIKado'
__version__ = 'v0.08' __version__ = 'v0.09'
__date__ = '30, June 2012' __date__ = '02, December 2012'
''' '''
http://www.weblogssl.com/ http://www.weblogssl.com/
''' '''
@ -37,6 +37,7 @@ class weblogssl(BasicNewsRecipe):
,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx') ,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil') ,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid') ,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
,(u'Xataka Windows', u'http://feeds.weblogssl.com/xatakawindows')
,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto') ,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon') ,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia') ,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
@ -80,19 +81,31 @@ class weblogssl(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}), keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
dict(name='div', attrs={'class':'post'}), dict(name='div', attrs={'class':'post'}),
dict(name='div', attrs={'id':'blog-comments'}) dict(name='div', attrs={'id':'blog-comments'}),
dict(name='div', attrs={'class':'container'}) #m.xataka.com
] ]
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})] remove_tags = [dict(name='div', attrs={'id':'comment-nav'}),
dict(name='menu', attrs={'class':'social-sharing'}), #m.xataka.com
dict(name='section' , attrs={'class':'comments'}), #m.xataka.com
dict(name='div' , attrs={'class':'article-comments'}), #m.xataka.com
dict(name='nav' , attrs={'class':'article-taxonomy'}) #m.xataka.com
]
remove_tags_after = dict(name='section' , attrs={'class':'comments'})
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.', 'http://m.') return url.replace('http://www.', 'http://m.')
preprocess_regexps = [ preprocess_regexps = [
# Para poner una linea en blanco entre un comentario y el siguiente # Para poner una linea en blanco entre un comentario y el siguiente
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c') (re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c'),
# Para ver las imágenes en las noticias de m.xataka.com
(re.compile(r'<noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
(re.compile(r'</noscript>', re.DOTALL|re.IGNORECASE), lambda m: '')
] ]
# Para sustituir el video incrustado de YouTube por una imagen # Para sustituir el video incrustado de YouTube por una imagen
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -108,14 +121,16 @@ class weblogssl(BasicNewsRecipe):
# Para obtener la url original del articulo a partir de la de "feedsportal" # Para obtener la url original del articulo a partir de la de "feedsportal"
# El siguiente código es gracias al usuario "bosplans" de www.mobileread.com # El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
# http://www.mobileread.com/forums/sho...d.php?t=130297 # http://www.mobileread.com/forums/showthread.php?t=130297
def get_article_url(self, article): def get_article_url(self, article):
link = article.get('link', None) link = article.get('link', None)
if link is None: if link is None:
return article return article
# if link.split('/')[-4]=="xataka2":
# return article.get('feedburner_origlink', article.get('link', article.get('guid')))
if link.split('/')[-4]=="xataka2": if link.split('/')[-4]=="xataka2":
return article.get('feedburner_origlink', article.get('link', article.get('guid'))) return article.get('guid', None)
if link.split('/')[-1]=="story01.htm": if link.split('/')[-1]=="story01.htm":
link=link.split('/')[-2] link=link.split('/')[-2]
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A'] a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']

View File

@ -9,15 +9,15 @@ class Zaman (BasicNewsRecipe):
__author__ = u'thomass' __author__ = u'thomass'
oldest_article = 2 oldest_article = 2
max_articles_per_feed =50 max_articles_per_feed =50
# no_stylesheets = True no_stylesheets = True
#delay = 1 #delay = 1
#use_embedded_content = False use_embedded_content = False
encoding = 'ISO 8859-9' encoding = 'utf-8'
publisher = 'Zaman' publisher = 'Feza Gazetecilik'
category = 'news, haberler,TR,gazete' category = 'news, haberler,TR,gazete'
language = 'tr' language = 'tr'
publication_type = 'newspaper ' publication_type = 'newspaper '
extra_css = '.buyukbaslik{font-weight: bold; font-size: 18px;color:#0000FF}'#body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} ' extra_css = 'h1{text-transform: capitalize; font-weight: bold; font-size: 22px;color:#0000FF} p{text-align:justify} ' #.introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
conversion_options = { conversion_options = {
'tags' : category 'tags' : category
,'language' : language ,'language' : language
@ -26,25 +26,26 @@ class Zaman (BasicNewsRecipe):
} }
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg' cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png' masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
ignore_duplicate_articles = { 'title', 'url' }
auto_cleanup = False
remove_empty_feeds= True
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ] #keep_only_tags = [dict(name='div', attrs={'id':[ 'contentposition19']})]#,dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}), ]
remove_tags = [ dict(name='img', attrs={'src':['http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif']})]#,dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']}) remove_tags = [ dict(name='img', attrs={'src':['http://cmsmedya.zaman.com.tr/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':['interactive-hr']})]# remove_tags = [ dict(name='div', attrs={'class':[ 'detayUyari']}),dict(name='div', attrs={'class':[ 'detayYorum']}),dict(name='div', attrs={'class':[ 'addthis_toolbox addthis_default_style ']}),dict(name='div', attrs={'id':[ 'tumYazi']})]#,dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']}),dict(name='div', attrs={'id':[ 'news-detail-gallery']}),dict(name='div', attrs={'id':[ 'news-detail-title-bottom-part']}),dict(name='div', attrs={'id':[ 'news-detail-news-paging-main']})]#
#remove_attributes = ['width','height'] #remove_attributes = ['width','height']
remove_empty_feeds= True remove_empty_feeds= True
feeds = [ feeds = [
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'), ( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
#( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
#( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'), ( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
( u'Politika', u'http://www.zaman.com.tr/politika.rss'), ( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'), ( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'), ( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'), ( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'), ( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
( u'Dizi Yazı', u'http://www.zaman.com.tr/dizi.rss'), ( u'Dizi Yazı', u'http://www.zaman.com.tr/dizi.rss'),
@ -59,8 +60,9 @@ class Zaman (BasicNewsRecipe):
( u'Cuma Eki', u'http://www.zaman.com.tr/cuma.rss'), ( u'Cuma Eki', u'http://www.zaman.com.tr/cuma.rss'),
( u'Cumaertesi Eki', u'http://www.zaman.com.tr/cumaertesi.rss'), ( u'Cumaertesi Eki', u'http://www.zaman.com.tr/cumaertesi.rss'),
( u'Pazar Eki', u'http://www.zaman.com.tr/pazar.rss'), ( u'Pazar Eki', u'http://www.zaman.com.tr/pazar.rss'),
( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
] ]
def print_version(self, url): def print_version(self, url):
return url.replace('http://www.zaman.com.tr/haber.do?haberno=', 'http://www.zaman.com.tr/yazdir.do?haberno=') return url.replace('http://www.zaman.com.tr/newsDetail_getNewsById.action?newsId=', 'http://www.zaman.com.tr/newsDetail_openPrintPage.action?newsId=')

View File

@ -215,6 +215,8 @@ class Command(object):
sys.stdout.flush() sys.stdout.flush()
def installer_name(ext, is64bit=False): def installer_name(ext, is64bit=False):
if is64bit and ext == 'msi':
return 'dist/%s-64bit-%s.msi'%(__appname__, __version__)
if ext in ('exe', 'msi'): if ext in ('exe', 'msi'):
return 'dist/%s-%s.%s'%(__appname__, __version__, ext) return 'dist/%s-%s.%s'%(__appname__, __version__, ext)
if ext == 'dmg': if ext == 'dmg':

View File

@ -11,12 +11,11 @@ from distutils.spawn import find_executable
from PyQt4 import pyqtconfig from PyQt4 import pyqtconfig
from setup import isosx, iswindows, islinux from setup import isosx, iswindows, islinux, is64bit
OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk' OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk'
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5' os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5'
is64bit = sys.maxsize > 2**32
NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None
if iswindows: if iswindows:

View File

@ -20,7 +20,7 @@ __all__ = [
'upload_user_manual', 'upload_demo', 'reupload', 'upload_user_manual', 'upload_demo', 'reupload',
'linux32', 'linux64', 'linux', 'linux_freeze', 'linux32', 'linux64', 'linux', 'linux_freeze',
'osx32_freeze', 'osx', 'rsync', 'push', 'osx32_freeze', 'osx', 'rsync', 'push',
'win32_freeze', 'win32', 'win', 'win32_freeze', 'win32', 'win64', 'win',
'stage1', 'stage2', 'stage3', 'stage4', 'stage5', 'publish' 'stage1', 'stage2', 'stage3', 'stage4', 'stage5', 'publish'
] ]
@ -91,9 +91,10 @@ osx = OSX()
from setup.installer.osx.app.main import OSX32_Freeze from setup.installer.osx.app.main import OSX32_Freeze
osx32_freeze = OSX32_Freeze() osx32_freeze = OSX32_Freeze()
from setup.installer.windows import Win, Win32 from setup.installer.windows import Win, Win32, Win64
win = Win() win = Win()
win32 = Win32() win32 = Win32()
win64 = Win64()
from setup.installer.windows.freeze import Win32Freeze from setup.installer.windows.freeze import Win32Freeze
win32_freeze = Win32Freeze() win32_freeze = Win32Freeze()

View File

@ -1,12 +1,12 @@
/* /*
* Memory DLL loading code * Memory DLL loading code
* Version 0.0.2 with additions from Thomas Heller * Version 0.0.3
* *
* Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de * Copyright (c) 2004-2012 by Joachim Bauch / mail@joachim-bauch.de
* http://www.joachim-bauch.de * http://www.joachim-bauch.de
* *
* The contents of this file are subject to the Mozilla Public License Version * The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with * 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/ * http://www.mozilla.org/MPL/
* *
@ -19,19 +19,25 @@
* *
* The Initial Developer of the Original Code is Joachim Bauch. * The Initial Developer of the Original Code is Joachim Bauch.
* *
* Portions created by Joachim Bauch are Copyright (C) 2004-2005 * Portions created by Joachim Bauch are Copyright (C) 2004-2012
* Joachim Bauch. All Rights Reserved. * Joachim Bauch. All Rights Reserved.
* *
* Portions Copyright (C) 2005 Thomas Heller.
*
*/ */
#ifndef __GNUC__
// disable warnings about pointer <-> DWORD conversions // disable warnings about pointer <-> DWORD conversions
#pragma warning( disable : 4311 4312 ) #pragma warning( disable : 4311 4312 )
#endif
#ifdef _WIN64
#define POINTER_TYPE ULONGLONG
#else
#define POINTER_TYPE DWORD
#endif
#include <Windows.h> #include <Windows.h>
#include <winnt.h> #include <winnt.h>
#if DEBUG_OUTPUT #ifdef DEBUG_OUTPUT
#include <stdio.h> #include <stdio.h>
#endif #endif
@ -39,136 +45,22 @@
// Vista SDKs no longer define IMAGE_SIZEOF_BASE_RELOCATION!? // Vista SDKs no longer define IMAGE_SIZEOF_BASE_RELOCATION!?
#define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION)) #define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION))
#endif #endif
#include "MemoryModule.h" #include "MemoryModule.h"
/* typedef struct {
XXX We need to protect at least walking the 'loaded' linked list with a lock!
*/
/******************************************************************/
FINDPROC findproc;
void *findproc_data = NULL;
struct NAME_TABLE {
char *name;
DWORD ordinal;
};
typedef struct tagMEMORYMODULE {
PIMAGE_NT_HEADERS headers; PIMAGE_NT_HEADERS headers;
unsigned char *codeBase; unsigned char *codeBase;
HMODULE *modules; HMODULE *modules;
int numModules; int numModules;
int initialized; int initialized;
struct NAME_TABLE *name_table;
char *name;
int refcount;
struct tagMEMORYMODULE *next, *prev;
} MEMORYMODULE, *PMEMORYMODULE; } MEMORYMODULE, *PMEMORYMODULE;
typedef BOOL (WINAPI *DllEntryProc)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved); typedef BOOL (WINAPI *DllEntryProc)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved);
#define GET_HEADER_DICTIONARY(module, idx) &(module)->headers->OptionalHeader.DataDirectory[idx] #define GET_HEADER_DICTIONARY(module, idx) &(module)->headers->OptionalHeader.DataDirectory[idx]
MEMORYMODULE *loaded; /* linked list of loaded memory modules */ #ifdef DEBUG_OUTPUT
/* private - insert a loaded library in a linked list */
static void _Register(char *name, MEMORYMODULE *module)
{
module->next = loaded;
if (loaded)
loaded->prev = module;
module->prev = NULL;
loaded = module;
}
/* private - remove a loaded library from a linked list */
static void _Unregister(MEMORYMODULE *module)
{
free(module->name);
if (module->prev)
module->prev->next = module->next;
if (module->next)
module->next->prev = module->prev;
if (module == loaded)
loaded = module->next;
}
/* public - replacement for GetModuleHandle() */
HMODULE MyGetModuleHandle(LPCTSTR lpModuleName)
{
MEMORYMODULE *p = loaded;
while (p) {
// If already loaded, only increment the reference count
if (0 == stricmp(lpModuleName, p->name)) {
return (HMODULE)p;
}
p = p->next;
}
return GetModuleHandle(lpModuleName);
}
/* public - replacement for LoadLibrary, but searches FIRST for memory
libraries, then for normal libraries. So, it will load libraries AS memory
module if they are found by findproc().
*/
HMODULE MyLoadLibrary(char *lpFileName)
{
MEMORYMODULE *p = loaded;
HMODULE hMod;
while (p) {
// If already loaded, only increment the reference count
if (0 == stricmp(lpFileName, p->name)) {
p->refcount++;
return (HMODULE)p;
}
p = p->next;
}
if (findproc && findproc_data) {
void *pdata = findproc(lpFileName, findproc_data);
if (pdata) {
hMod = MemoryLoadLibrary(lpFileName, pdata);
free(p);
return hMod;
}
}
hMod = LoadLibrary(lpFileName);
return hMod;
}
/* public - replacement for GetProcAddress() */
FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName)
{
MEMORYMODULE *p = loaded;
while (p) {
if ((HMODULE)p == hModule)
return MemoryGetProcAddress(p, lpProcName);
p = p->next;
}
return GetProcAddress(hModule, lpProcName);
}
/* public - replacement for FreeLibrary() */
BOOL MyFreeLibrary(HMODULE hModule)
{
MEMORYMODULE *p = loaded;
while (p) {
if ((HMODULE)p == hModule) {
if (--p->refcount == 0) {
_Unregister(p);
MemoryFreeLibrary(p);
}
return TRUE;
}
p = p->next;
}
return FreeLibrary(hModule);
}
#if DEBUG_OUTPUT
static void static void
OutputLastError(const char *msg) OutputLastError(const char *msg)
{ {
@ -184,20 +76,6 @@ OutputLastError(const char *msg)
} }
#endif #endif
/*
static int dprintf(char *fmt, ...)
{
char Buffer[4096];
va_list marker;
int result;
va_start(marker, fmt);
result = vsprintf(Buffer, fmt, marker);
OutputDebugString(Buffer);
return result;
}
*/
static void static void
CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMODULE module) CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMODULE module)
{ {
@ -205,15 +83,12 @@ CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMO
unsigned char *codeBase = module->codeBase; unsigned char *codeBase = module->codeBase;
unsigned char *dest; unsigned char *dest;
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers); PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) {
{ if (section->SizeOfRawData == 0) {
if (section->SizeOfRawData == 0)
{
// section doesn't contain data in the dll itself, but may define // section doesn't contain data in the dll itself, but may define
// uninitialized data // uninitialized data
size = old_headers->OptionalHeader.SectionAlignment; size = old_headers->OptionalHeader.SectionAlignment;
if (size > 0) if (size > 0) {
{
dest = (unsigned char *)VirtualAlloc(codeBase + section->VirtualAddress, dest = (unsigned char *)VirtualAlloc(codeBase + section->VirtualAddress,
size, size,
MEM_COMMIT, MEM_COMMIT,
@ -255,66 +130,72 @@ FinalizeSections(PMEMORYMODULE module)
{ {
int i; int i;
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers); PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
#ifdef _WIN64
POINTER_TYPE imageOffset = (module->headers->OptionalHeader.ImageBase & 0xffffffff00000000);
#else
#define imageOffset 0
#endif
// loop through all sections and change access flags // loop through all sections and change access flags
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) {
{
DWORD protect, oldProtect, size; DWORD protect, oldProtect, size;
int executable = (section->Characteristics & IMAGE_SCN_MEM_EXECUTE) != 0; int executable = (section->Characteristics & IMAGE_SCN_MEM_EXECUTE) != 0;
int readable = (section->Characteristics & IMAGE_SCN_MEM_READ) != 0; int readable = (section->Characteristics & IMAGE_SCN_MEM_READ) != 0;
int writeable = (section->Characteristics & IMAGE_SCN_MEM_WRITE) != 0; int writeable = (section->Characteristics & IMAGE_SCN_MEM_WRITE) != 0;
if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) {
{
// section is not needed any more and can safely be freed // section is not needed any more and can safely be freed
VirtualFree((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, MEM_DECOMMIT); VirtualFree((LPVOID)((POINTER_TYPE)section->Misc.PhysicalAddress | imageOffset), section->SizeOfRawData, MEM_DECOMMIT);
continue; continue;
} }
// determine protection flags based on characteristics // determine protection flags based on characteristics
protect = ProtectionFlags[executable][readable][writeable]; protect = ProtectionFlags[executable][readable][writeable];
if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED) if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED) {
protect |= PAGE_NOCACHE; protect |= PAGE_NOCACHE;
}
// determine size of region // determine size of region
size = section->SizeOfRawData; size = section->SizeOfRawData;
if (size == 0) if (size == 0) {
{ if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) {
if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
size = module->headers->OptionalHeader.SizeOfInitializedData; size = module->headers->OptionalHeader.SizeOfInitializedData;
else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) } else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) {
size = module->headers->OptionalHeader.SizeOfUninitializedData; size = module->headers->OptionalHeader.SizeOfUninitializedData;
} }
}
if (size > 0) if (size > 0) {
{
// change memory access flags // change memory access flags
if (VirtualProtect((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, protect, &oldProtect) == 0) if (VirtualProtect((LPVOID)((POINTER_TYPE)section->Misc.PhysicalAddress | imageOffset), size, protect, &oldProtect) == 0)
#if DEBUG_OUTPUT #ifdef DEBUG_OUTPUT
OutputLastError("Error protecting memory page") OutputLastError("Error protecting memory page")
#endif #endif
; ;
} }
} }
#ifndef _WIN64
#undef imageOffset
#endif
} }
static void static void
PerformBaseRelocation(PMEMORYMODULE module, DWORD delta) PerformBaseRelocation(PMEMORYMODULE module, SIZE_T delta)
{ {
DWORD i; DWORD i;
unsigned char *codeBase = module->codeBase; unsigned char *codeBase = module->codeBase;
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_BASERELOC); PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_BASERELOC);
if (directory->Size > 0) if (directory->Size > 0) {
{
PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION) (codeBase + directory->VirtualAddress); PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION) (codeBase + directory->VirtualAddress);
for (; relocation->VirtualAddress > 0; ) for (; relocation->VirtualAddress > 0; ) {
{ unsigned char *dest = codeBase + relocation->VirtualAddress;
unsigned char *dest = (unsigned char *)(codeBase + relocation->VirtualAddress);
unsigned short *relInfo = (unsigned short *)((unsigned char *)relocation + IMAGE_SIZEOF_BASE_RELOCATION); unsigned short *relInfo = (unsigned short *)((unsigned char *)relocation + IMAGE_SIZEOF_BASE_RELOCATION);
for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++) for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++) {
{
DWORD *patchAddrHL; DWORD *patchAddrHL;
#ifdef _WIN64
ULONGLONG *patchAddr64;
#endif
int type, offset; int type, offset;
// the upper 4 bits define the type of relocation // the upper 4 bits define the type of relocation
@ -331,9 +212,16 @@ PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
case IMAGE_REL_BASED_HIGHLOW: case IMAGE_REL_BASED_HIGHLOW:
// change complete 32 bit address // change complete 32 bit address
patchAddrHL = (DWORD *) (dest + offset); patchAddrHL = (DWORD *) (dest + offset);
*patchAddrHL += delta; *patchAddrHL += (DWORD)delta;
break; break;
#ifdef _WIN64
case IMAGE_REL_BASED_DIR64:
patchAddr64 = (ULONGLONG *) (dest + offset);
*patchAddr64 += delta;
break;
#endif
default: default:
//printf("Unknown relocation: %d\n", type); //printf("Unknown relocation: %d\n", type);
break; break;
@ -341,7 +229,7 @@ PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
} }
// advance to next relocation block // advance to next relocation block
relocation = (PIMAGE_BASE_RELOCATION)(((DWORD)relocation) + relocation->SizeOfBlock); relocation = (PIMAGE_BASE_RELOCATION) (((char *) relocation) + relocation->SizeOfBlock);
} }
} }
} }
@ -353,18 +241,13 @@ BuildImportTable(PMEMORYMODULE module)
unsigned char *codeBase = module->codeBase; unsigned char *codeBase = module->codeBase;
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_IMPORT); PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_IMPORT);
if (directory->Size > 0) if (directory->Size > 0) {
{
PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR) (codeBase + directory->VirtualAddress); PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR) (codeBase + directory->VirtualAddress);
for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++) for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++) {
{ POINTER_TYPE *thunkRef;
DWORD *thunkRef, *funcRef; FARPROC *funcRef;
HMODULE handle; HMODULE handle = LoadLibrary((LPCSTR) (codeBase + importDesc->Name));
if (handle == NULL) {
handle = MyLoadLibrary(codeBase + importDesc->Name);
if (handle == INVALID_HANDLE_VALUE)
{
//LastError should already be set
#if DEBUG_OUTPUT #if DEBUG_OUTPUT
OutputLastError("Can't load library"); OutputLastError("Can't load library");
#endif #endif
@ -373,81 +256,54 @@ BuildImportTable(PMEMORYMODULE module)
} }
module->modules = (HMODULE *)realloc(module->modules, (module->numModules+1)*(sizeof(HMODULE))); module->modules = (HMODULE *)realloc(module->modules, (module->numModules+1)*(sizeof(HMODULE)));
if (module->modules == NULL) if (module->modules == NULL) {
{
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
result = 0; result = 0;
break; break;
} }
module->modules[module->numModules++] = handle; module->modules[module->numModules++] = handle;
if (importDesc->OriginalFirstThunk) if (importDesc->OriginalFirstThunk) {
{ thunkRef = (POINTER_TYPE *) (codeBase + importDesc->OriginalFirstThunk);
thunkRef = (DWORD *)(codeBase + importDesc->OriginalFirstThunk); funcRef = (FARPROC *) (codeBase + importDesc->FirstThunk);
funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
} else { } else {
// no hint table // no hint table
thunkRef = (DWORD *)(codeBase + importDesc->FirstThunk); thunkRef = (POINTER_TYPE *) (codeBase + importDesc->FirstThunk);
funcRef = (DWORD *)(codeBase + importDesc->FirstThunk); funcRef = (FARPROC *) (codeBase + importDesc->FirstThunk);
} }
for (; *thunkRef; thunkRef++, funcRef++) for (; *thunkRef; thunkRef++, funcRef++) {
{ if (IMAGE_SNAP_BY_ORDINAL(*thunkRef)) {
if IMAGE_SNAP_BY_ORDINAL(*thunkRef) { *funcRef = (FARPROC)GetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
} else { } else {
PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME)(codeBase + *thunkRef); PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME) (codeBase + (*thunkRef));
*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)&thunkData->Name); *funcRef = (FARPROC)GetProcAddress(handle, (LPCSTR)&thunkData->Name);
} }
if (*funcRef == 0) if (*funcRef == 0) {
{
SetLastError(ERROR_PROC_NOT_FOUND);
result = 0; result = 0;
break; break;
} }
} }
if (!result) if (!result) {
break; break;
} }
} }
}
return result; return result;
} }
/* HMEMORYMODULE MemoryLoadLibrary(const void *data)
MemoryLoadLibrary - load a library AS MEMORY MODULE, or return
existing MEMORY MODULE with increased refcount.
This allows to load a library AGAIN as memory module which is
already loaded as HMODULE!
*/
HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
{ {
PMEMORYMODULE result; PMEMORYMODULE result;
PIMAGE_DOS_HEADER dos_header; PIMAGE_DOS_HEADER dos_header;
PIMAGE_NT_HEADERS old_header; PIMAGE_NT_HEADERS old_header;
unsigned char *code, *headers; unsigned char *code, *headers;
DWORD locationDelta; SIZE_T locationDelta;
DllEntryProc DllEntry; DllEntryProc DllEntry;
BOOL successfull; BOOL successfull;
MEMORYMODULE *p = loaded;
while (p) {
// If already loaded, only increment the reference count
if (0 == stricmp(name, p->name)) {
p->refcount++;
return (HMODULE)p;
}
p = p->next;
}
/* Do NOT check for GetModuleHandle here! */
dos_header = (PIMAGE_DOS_HEADER)data; dos_header = (PIMAGE_DOS_HEADER)data;
if (dos_header->e_magic != IMAGE_DOS_SIGNATURE) if (dos_header->e_magic != IMAGE_DOS_SIGNATURE) {
{
SetLastError(ERROR_BAD_FORMAT);
#if DEBUG_OUTPUT #if DEBUG_OUTPUT
OutputDebugString("Not a valid executable file.\n"); OutputDebugString("Not a valid executable file.\n");
#endif #endif
@ -455,9 +311,7 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
} }
old_header = (PIMAGE_NT_HEADERS)&((const unsigned char *)(data))[dos_header->e_lfanew]; old_header = (PIMAGE_NT_HEADERS)&((const unsigned char *)(data))[dos_header->e_lfanew];
if (old_header->Signature != IMAGE_NT_SIGNATURE) if (old_header->Signature != IMAGE_NT_SIGNATURE) {
{
SetLastError(ERROR_BAD_FORMAT);
#if DEBUG_OUTPUT #if DEBUG_OUTPUT
OutputDebugString("No PE header found.\n"); OutputDebugString("No PE header found.\n");
#endif #endif
@ -470,31 +324,25 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
MEM_RESERVE, MEM_RESERVE,
PAGE_READWRITE); PAGE_READWRITE);
if (code == NULL) if (code == NULL) {
// try to allocate memory at arbitrary position // try to allocate memory at arbitrary position
code = (unsigned char *)VirtualAlloc(NULL, code = (unsigned char *)VirtualAlloc(NULL,
old_header->OptionalHeader.SizeOfImage, old_header->OptionalHeader.SizeOfImage,
MEM_RESERVE, MEM_RESERVE,
PAGE_READWRITE); PAGE_READWRITE);
if (code == NULL) {
if (code == NULL)
{
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
#if DEBUG_OUTPUT #if DEBUG_OUTPUT
OutputLastError("Can't reserve memory"); OutputLastError("Can't reserve memory");
#endif #endif
return NULL; return NULL;
} }
}
result = (PMEMORYMODULE)HeapAlloc(GetProcessHeap(), 0, sizeof(MEMORYMODULE)); result = (PMEMORYMODULE)HeapAlloc(GetProcessHeap(), 0, sizeof(MEMORYMODULE));
result->codeBase = code; result->codeBase = code;
result->numModules = 0; result->numModules = 0;
result->modules = NULL; result->modules = NULL;
result->initialized = 0; result->initialized = 0;
result->next = result->prev = NULL;
result->refcount = 1;
result->name = strdup(name);
result->name_table = NULL;
// XXX: is it correct to commit the complete memory region at once? // XXX: is it correct to commit the complete memory region at once?
// calling DllEntry raises an exception if we don't... // calling DllEntry raises an exception if we don't...
@ -514,31 +362,30 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
result->headers = (PIMAGE_NT_HEADERS)&((const unsigned char *)(headers))[dos_header->e_lfanew]; result->headers = (PIMAGE_NT_HEADERS)&((const unsigned char *)(headers))[dos_header->e_lfanew];
// update position // update position
result->headers->OptionalHeader.ImageBase = (DWORD)code; result->headers->OptionalHeader.ImageBase = (POINTER_TYPE)code;
// copy sections from DLL file block to new memory location // copy sections from DLL file block to new memory location
CopySections(data, old_header, result); CopySections(data, old_header, result);
// adjust base address of imported data // adjust base address of imported data
locationDelta = (DWORD)(code - old_header->OptionalHeader.ImageBase); locationDelta = (SIZE_T)(code - old_header->OptionalHeader.ImageBase);
if (locationDelta != 0) if (locationDelta != 0) {
PerformBaseRelocation(result, locationDelta); PerformBaseRelocation(result, locationDelta);
}
// load required dlls and adjust function table of imports // load required dlls and adjust function table of imports
if (!BuildImportTable(result)) if (!BuildImportTable(result)) {
goto error; goto error;
}
// mark memory pages depending on section headers and release // mark memory pages depending on section headers and release
// sections that are marked as "discardable" // sections that are marked as "discardable"
FinalizeSections(result); FinalizeSections(result);
// get entry point of loaded library // get entry point of loaded library
if (result->headers->OptionalHeader.AddressOfEntryPoint != 0) if (result->headers->OptionalHeader.AddressOfEntryPoint != 0) {
{
DllEntry = (DllEntryProc) (code + result->headers->OptionalHeader.AddressOfEntryPoint); DllEntry = (DllEntryProc) (code + result->headers->OptionalHeader.AddressOfEntryPoint);
if (DllEntry == 0) if (DllEntry == 0) {
{
SetLastError(ERROR_BAD_FORMAT); /* XXX ? */
#if DEBUG_OUTPUT #if DEBUG_OUTPUT
OutputDebugString("Library has no entry point.\n"); OutputDebugString("Library has no entry point.\n");
#endif #endif
@ -547,8 +394,7 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
// notify library about attaching to process // notify library about attaching to process
successfull = (*DllEntry)((HINSTANCE)code, DLL_PROCESS_ATTACH, 0); successfull = (*DllEntry)((HINSTANCE)code, DLL_PROCESS_ATTACH, 0);
if (!successfull) if (!successfull) {
{
#if DEBUG_OUTPUT #if DEBUG_OUTPUT
OutputDebugString("Can't attach library.\n"); OutputDebugString("Can't attach library.\n");
#endif #endif
@ -557,99 +403,55 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
result->initialized = 1; result->initialized = 1;
} }
_Register(name, result);
return (HMEMORYMODULE)result; return (HMEMORYMODULE)result;
error: error:
// cleanup // cleanup
free(result->name);
MemoryFreeLibrary(result); MemoryFreeLibrary(result);
return NULL; return NULL;
} }
int _compare(const struct NAME_TABLE *p1, const struct NAME_TABLE *p2)
{
return stricmp(p1->name, p2->name);
}
int _find(const char **name, const struct NAME_TABLE *p)
{
return stricmp(*name, p->name);
}
struct NAME_TABLE *GetNameTable(PMEMORYMODULE module)
{
unsigned char *codeBase;
PIMAGE_EXPORT_DIRECTORY exports;
PIMAGE_DATA_DIRECTORY directory;
DWORD i, *nameRef;
WORD *ordinal;
struct NAME_TABLE *p, *ptab;
if (module->name_table)
return module->name_table;
codeBase = module->codeBase;
directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_EXPORT);
exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
nameRef = (DWORD *)(codeBase + exports->AddressOfNames);
ordinal = (WORD *)(codeBase + exports->AddressOfNameOrdinals);
p = ((PMEMORYMODULE)module)->name_table = (struct NAME_TABLE *)malloc(sizeof(struct NAME_TABLE)
* exports->NumberOfNames);
if (p == NULL)
return NULL;
ptab = p;
for (i=0; i<exports->NumberOfNames; ++i) {
p->name = (char *)(codeBase + *nameRef++);
p->ordinal = *ordinal++;
++p;
}
qsort(ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _compare);
return ptab;
}
FARPROC MemoryGetProcAddress(HMEMORYMODULE module, const char *name) FARPROC MemoryGetProcAddress(HMEMORYMODULE module, const char *name)
{ {
unsigned char *codeBase = ((PMEMORYMODULE)module)->codeBase; unsigned char *codeBase = ((PMEMORYMODULE)module)->codeBase;
int idx=-1; int idx=-1;
DWORD i, *nameRef;
WORD *ordinal;
PIMAGE_EXPORT_DIRECTORY exports; PIMAGE_EXPORT_DIRECTORY exports;
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY((PMEMORYMODULE)module, IMAGE_DIRECTORY_ENTRY_EXPORT); PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY((PMEMORYMODULE)module, IMAGE_DIRECTORY_ENTRY_EXPORT);
if (directory->Size == 0) {
if (directory->Size == 0)
// no export table found // no export table found
return NULL; return NULL;
}
exports = (PIMAGE_EXPORT_DIRECTORY) (codeBase + directory->VirtualAddress); exports = (PIMAGE_EXPORT_DIRECTORY) (codeBase + directory->VirtualAddress);
if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0) if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0) {
// DLL doesn't export anything // DLL doesn't export anything
return NULL; return NULL;
}
if (HIWORD(name)) { // search function name in list of exported names
struct NAME_TABLE *ptab; nameRef = (DWORD *) (codeBase + exports->AddressOfNames);
struct NAME_TABLE *found; ordinal = (WORD *) (codeBase + exports->AddressOfNameOrdinals);
ptab = GetNameTable((PMEMORYMODULE)module); for (i=0; i<exports->NumberOfNames; i++, nameRef++, ordinal++) {
if (ptab == NULL) if (_stricmp(name, (const char *) (codeBase + (*nameRef))) == 0) {
// some failure idx = *ordinal;
return NULL; break;
found = bsearch(&name, ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _find); }
if (found == NULL) }
if (idx == -1) {
// exported symbol not found // exported symbol not found
return NULL; return NULL;
idx = found->ordinal;
} }
else
idx = LOWORD(name) - exports->Base;
if ((DWORD)idx > exports->NumberOfFunctions) if ((DWORD)idx > exports->NumberOfFunctions) {
// name <-> ordinal number don't match // name <-> ordinal number don't match
return NULL; return NULL;
}
// AddressOfFunctions contains the RVAs to the "real" functions // AddressOfFunctions contains the RVAs to the "real" functions
return (FARPROC)(codeBase + *(DWORD *)(codeBase + exports->AddressOfFunctions + (idx*4))); return (FARPROC) (codeBase + (*(DWORD *) (codeBase + exports->AddressOfFunctions + (idx*4))));
} }
void MemoryFreeLibrary(HMEMORYMODULE mod) void MemoryFreeLibrary(HMEMORYMODULE mod)
@ -657,32 +459,29 @@ void MemoryFreeLibrary(HMEMORYMODULE mod)
int i; int i;
PMEMORYMODULE module = (PMEMORYMODULE)mod; PMEMORYMODULE module = (PMEMORYMODULE)mod;
if (module != NULL) if (module != NULL) {
{ if (module->initialized != 0) {
if (module->initialized != 0)
{
// notify library about detaching from process // notify library about detaching from process
DllEntryProc DllEntry = (DllEntryProc) (module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint); DllEntryProc DllEntry = (DllEntryProc) (module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint);
(*DllEntry)((HINSTANCE)module->codeBase, DLL_PROCESS_DETACH, 0); (*DllEntry)((HINSTANCE)module->codeBase, DLL_PROCESS_DETACH, 0);
module->initialized = 0; module->initialized = 0;
} }
if (module->modules != NULL) if (module->modules != NULL) {
{
// free previously opened libraries // free previously opened libraries
for (i=0; i<module->numModules; i++) for (i=0; i<module->numModules; i++) {
if (module->modules[i] != INVALID_HANDLE_VALUE) if (module->modules[i] != INVALID_HANDLE_VALUE) {
MyFreeLibrary(module->modules[i]); FreeLibrary(module->modules[i]);
}
}
free(module->modules); free(module->modules);
} }
if (module->codeBase != NULL) if (module->codeBase != NULL) {
// release memory of library // release memory of library
VirtualFree(module->codeBase, 0, MEM_RELEASE); VirtualFree(module->codeBase, 0, MEM_RELEASE);
}
if (module->name_table != NULL)
free(module->name_table);
HeapFree(GetProcessHeap(), 0, module); HeapFree(GetProcessHeap(), 0, module);
} }

View File

@ -1,12 +1,12 @@
/* /*
* Memory DLL loading code * Memory DLL loading code
* Version 0.0.2 * Version 0.0.3
* *
* Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de * Copyright (c) 2004-2012 by Joachim Bauch / mail@joachim-bauch.de
* http://www.joachim-bauch.de * http://www.joachim-bauch.de
* *
* The contents of this file are subject to the Mozilla Public License Version * The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with * 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/ * http://www.mozilla.org/MPL/
* *
@ -19,7 +19,7 @@
* *
* The Initial Developer of the Original Code is Joachim Bauch. * The Initial Developer of the Original Code is Joachim Bauch.
* *
* Portions created by Joachim Bauch are Copyright (C) 2004-2005 * Portions created by Joachim Bauch are Copyright (C) 2004-2012
* Joachim Bauch. All Rights Reserved. * Joachim Bauch. All Rights Reserved.
* *
*/ */
@ -35,22 +35,12 @@ typedef void *HMEMORYMODULE;
extern "C" { extern "C" {
#endif #endif
typedef void *(*FINDPROC)(); HMEMORYMODULE MemoryLoadLibrary(const void *);
extern FINDPROC findproc;
extern void *findproc_data;
HMEMORYMODULE MemoryLoadLibrary(char *, const void *);
FARPROC MemoryGetProcAddress(HMEMORYMODULE, const char *); FARPROC MemoryGetProcAddress(HMEMORYMODULE, const char *);
void MemoryFreeLibrary(HMEMORYMODULE); void MemoryFreeLibrary(HMEMORYMODULE);
BOOL MyFreeLibrary(HMODULE hModule);
HMODULE MyLoadLibrary(char *lpFileName);
FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName);
HMODULE MyGetModuleHandle(LPCTSTR lpModuleName);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -8,53 +8,66 @@ __docformat__ = 'restructuredtext en'
import os, shutil, subprocess import os, shutil, subprocess
from setup import Command, __appname__, __version__ from setup import Command, __appname__, __version__, installer_name
from setup.installer import VMInstaller from setup.installer import VMInstaller
class Win(Command): class Win(Command):
description = 'Build windows binary installers' description = 'Build windows binary installers'
sub_commands = ['win32'] sub_commands = ['win64', 'win32']
def run(self, opts): def run(self, opts):
pass pass
class WinBase(VMInstaller):
class Win32(VMInstaller):
description = 'Build 32bit windows binary installer'
INSTALLER_EXT = 'exe'
VM_NAME = 'xp_build'
VM = '/vmware/bin/%s'%VM_NAME
VM_CHECK = 'calibre_windows_xp_home'
FREEZE_COMMAND = 'win32_freeze' FREEZE_COMMAND = 'win32_freeze'
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice' FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
INSTALLER_EXT = 'msi' INSTALLER_EXT = 'msi'
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0'] SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
def sign_msi(self):
print ('Signing installers ...') class Win32(WinBase):
subprocess.check_call(['ssh', self.VM_NAME, '~/sign.sh'], shell=False)
description = 'Build 32bit windows binary installer'
VM_NAME = 'xp_build'
VM = '/vmware/bin/%s'%VM_NAME
VM_CHECK = 'calibre_windows_xp_home'
@property
def msi64(self):
return installer_name('msi', is64bit=True)
def do_dl(self, installer, errmsg):
subprocess.check_call(('scp',
'%s:build/%s/%s'%(self.VM_NAME, __appname__, installer), 'dist'))
if not os.path.exists(installer):
self.warn(errmsg)
raise SystemExit(1)
def download_installer(self): def download_installer(self):
installer = self.installer() installer = self.installer()
if os.path.exists('build/winfrozen'): if os.path.exists('build/winfrozen'):
shutil.rmtree('build/winfrozen') shutil.rmtree('build/winfrozen')
self.sign_msi()
subprocess.check_call(('scp',
'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
if not os.path.exists(installer):
self.warn('Failed to freeze')
raise SystemExit(1)
self.do_dl(installer, 'Failed to freeze')
installer = 'dist/%s-portable-installer-%s.exe'%(__appname__, __version__) installer = 'dist/%s-portable-installer-%s.exe'%(__appname__, __version__)
subprocess.check_call(('scp', self.do_dl(installer, 'Failed to get portable installer')
'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
if not os.path.exists(installer): class Win64(WinBase):
self.warn('Failed to get portable installer')
raise SystemExit(1) description = 'Build 64bit windows binary installer'
VM_NAME = 'win64'
VM = '/vmware/bin/%s'%VM_NAME
VM_CHECK = 'win64'
IS_64_BIT = True
BUILD_PREFIX = WinBase.BUILD_PREFIX + [
'if [ -f "$HOME/.bash_profile" ] ; then',
' source "$HOME/.bash_profile"',
'fi',
]

View File

@ -25,6 +25,7 @@ LZMA = r'Q:\easylzma\build\easylzma-0.0.8'
VERSION = re.sub('[a-z]\d+', '', __version__) VERSION = re.sub('[a-z]\d+', '', __version__)
WINVER = VERSION+'.0' WINVER = VERSION+'.0'
machine = 'X64' if is64bit else 'X86'
DESCRIPTIONS = { DESCRIPTIONS = {
'calibre' : 'The main calibre program', 'calibre' : 'The main calibre program',
@ -90,6 +91,7 @@ class Win32Freeze(Command, WixMixIn):
if not is64bit: if not is64bit:
self.build_portable() self.build_portable()
self.build_portable_installer() self.build_portable_installer()
self.sign_installers()
def remove_CRT_from_manifests(self): def remove_CRT_from_manifests(self):
''' '''
@ -110,7 +112,7 @@ class Win32Freeze(Command, WixMixIn):
self.info('Removing CRT dependency from manifest of: %s'%bn) self.info('Removing CRT dependency from manifest of: %s'%bn)
# Blank out the bytes corresponding to the dependency specification # Blank out the bytes corresponding to the dependency specification
nraw = repl_pat.sub(lambda m: b' '*len(m.group()), raw) nraw = repl_pat.sub(lambda m: b' '*len(m.group()), raw)
if len(nraw) != len(raw): if len(nraw) != len(raw) or nraw == raw:
raise Exception('Something went wrong with %s'%bn) raise Exception('Something went wrong with %s'%bn)
with open(dll, 'wb') as f: with open(dll, 'wb') as f:
f.write(nraw) f.write(nraw)
@ -132,6 +134,23 @@ class Win32Freeze(Command, WixMixIn):
# used instead # used instead
shutil.copy2(f, tgt) shutil.copy2(f, tgt)
def fix_pyd_bootstraps_in(self, folder):
for dirpath, dirnames, filenames in os.walk(folder):
for f in filenames:
name, ext = os.path.splitext(f)
bpy = self.j(dirpath, name + '.py')
if ext == '.pyd' and os.path.exists(bpy):
with open(bpy, 'rb') as f:
raw = f.read().strip()
if (not raw.startswith('def __bootstrap__') or not
raw.endswith('__bootstrap__()')):
raise Exception('The file %r has non'
' bootstrap code'%self.j(dirpath, f))
for ext in ('.py', '.pyc', '.pyo'):
x = self.j(dirpath, name+ext)
if os.path.exists(x):
os.remove(x)
def freeze(self): def freeze(self):
shutil.copy2(self.j(self.src_root, 'LICENSE'), self.base) shutil.copy2(self.j(self.src_root, 'LICENSE'), self.base)
@ -184,23 +203,12 @@ class Win32Freeze(Command, WixMixIn):
shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell')) shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
shutil.rmtree(comext) shutil.rmtree(comext)
# Fix PyCrypto, removing the bootstrap .py modules that load the .pyd # Fix PyCrypto and Pillow, removing the bootstrap .py modules that load
# modules, since they do not work when in a zip file # the .pyd modules, since they do not work when in a zip file
for crypto_dir in glob.glob(self.j(sp_dir, 'pycrypto-*', 'Crypto')): for folder in os.listdir(sp_dir):
for dirpath, dirnames, filenames in os.walk(crypto_dir): folder = self.j(sp_dir, folder)
for f in filenames: if os.path.isdir(folder):
name, ext = os.path.splitext(f) self.fix_pyd_bootstraps_in(folder)
if ext == '.pyd':
with open(self.j(dirpath, name+'.py')) as f:
raw = f.read().strip()
if (not raw.startswith('def __bootstrap__') or not
raw.endswith('__bootstrap__()')):
raise Exception('The PyCrypto file %r has non'
' bootstrap code'%self.j(dirpath, f))
for ext in ('.py', '.pyc', '.pyo'):
x = self.j(dirpath, name+ext)
if os.path.exists(x):
os.remove(x)
for pat in (r'PyQt4\uic\port_v3', ): for pat in (r'PyQt4\uic\port_v3', ):
x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0] x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
@ -367,7 +375,7 @@ class Win32Freeze(Command, WixMixIn):
if not self.opts.keep_site: if not self.opts.keep_site:
os.remove(y) os.remove(y)
def run_builder(self, cmd): def run_builder(self, cmd, show_output=False):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
if p.wait() != 0: if p.wait() != 0:
@ -376,6 +384,9 @@ class Win32Freeze(Command, WixMixIn):
self.info(p.stdout.read()) self.info(p.stdout.read())
self.info(p.stderr.read()) self.info(p.stderr.read())
sys.exit(1) sys.exit(1)
if show_output:
self.info(p.stdout.read())
self.info(p.stderr.read())
def build_portable_installer(self): def build_portable_installer(self):
zf = self.a(self.j('dist', 'calibre-portable-%s.zip.lz'%VERSION)) zf = self.a(self.j('dist', 'calibre-portable-%s.zip.lz'%VERSION))
@ -401,7 +412,7 @@ class Win32Freeze(Command, WixMixIn):
exe = self.j('dist', 'calibre-portable-installer-%s.exe'%VERSION) exe = self.j('dist', 'calibre-portable-installer-%s.exe'%VERSION)
if self.newer(exe, [obj, xobj]): if self.newer(exe, [obj, xobj]):
self.info('Linking', exe) self.info('Linking', exe)
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86', cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:'+machine,
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS', '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
'/LIBPATH:'+(LZMA+r'\lib\Release'), '/LIBPATH:'+(LZMA+r'\lib\Release'),
'/RELEASE', '/MANIFEST', '/MANIFESTUAC:level="asInvoker" uiAccess="false"', '/RELEASE', '/MANIFEST', '/MANIFESTUAC:level="asInvoker" uiAccess="false"',
@ -458,7 +469,7 @@ class Win32Freeze(Command, WixMixIn):
exe = self.j(base, 'calibre-portable.exe') exe = self.j(base, 'calibre-portable.exe')
if self.newer(exe, [obj]): if self.newer(exe, [obj]):
self.info('Linking', exe) self.info('Linking', exe)
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86', cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:'+machine,
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS', '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
'/RELEASE', '/RELEASE',
'/ENTRY:wWinMainCRTStartup', '/ENTRY:wWinMainCRTStartup',
@ -478,6 +489,17 @@ class Win32Freeze(Command, WixMixIn):
subprocess.check_call([LZMA + r'\bin\elzma.exe', '-9', '--lzip', name]) subprocess.check_call([LZMA + r'\bin\elzma.exe', '-9', '--lzip', name])
def sign_installers(self):
self.info('Signing installers...')
files = glob.glob(self.j('dist', '*.msi')) + glob.glob(self.j('dist',
'*.exe'))
if not files:
raise ValueError('No installers found')
subprocess.check_call(['signtool.exe', 'sign', '/a', '/d',
'calibre - E-book management', '/du',
'http://calibre-ebook.com', '/t',
'http://timestamp.verisign.com/scripts/timstamp.dll'] + files)
def add_dir_to_zip(self, zf, path, prefix=''): def add_dir_to_zip(self, zf, path, prefix=''):
''' '''
Add a directory recursively to the zip file with an optional prefix. Add a directory recursively to the zip file with an optional prefix.
@ -499,9 +521,11 @@ class Win32Freeze(Command, WixMixIn):
finally: finally:
os.chdir(cwd) os.chdir(cwd)
def build_launchers(self): def build_launchers(self, debug=False):
if not os.path.exists(self.obj_dir): if not os.path.exists(self.obj_dir):
os.makedirs(self.obj_dir) os.makedirs(self.obj_dir)
dflags = (['/Zi'] if debug else [])
dlflags = (['/DEBUG'] if debug else ['/INCREMENTAL:NO'])
base = self.j(self.src_root, 'setup', 'installer', 'windows') base = self.j(self.src_root, 'setup', 'installer', 'windows')
sources = [self.j(base, x) for x in ['util.c', 'MemoryModule.c']] sources = [self.j(base, x) for x in ['util.c', 'MemoryModule.c']]
headers = [self.j(base, x) for x in ['util.h', 'MemoryModule.h']] headers = [self.j(base, x) for x in ['util.h', 'MemoryModule.h']]
@ -510,20 +534,20 @@ class Win32Freeze(Command, WixMixIn):
cflags += ['/DPYDLL="python%s.dll"'%self.py_ver, '/IC:/Python%s/include'%self.py_ver] cflags += ['/DPYDLL="python%s.dll"'%self.py_ver, '/IC:/Python%s/include'%self.py_ver]
for src, obj in zip(sources, objects): for src, obj in zip(sources, objects):
if not self.newer(obj, headers+[src]): continue if not self.newer(obj, headers+[src]): continue
cmd = [msvc.cc] + cflags + ['/Fo'+obj, '/Tc'+src] cmd = [msvc.cc] + cflags + dflags + ['/Fo'+obj, '/Tc'+src]
self.run_builder(cmd) self.run_builder(cmd, show_output=True)
dll = self.j(self.obj_dir, 'calibre-launcher.dll') dll = self.j(self.obj_dir, 'calibre-launcher.dll')
ver = '.'.join(__version__.split('.')[:2]) ver = '.'.join(__version__.split('.')[:2])
if self.newer(dll, objects): if self.newer(dll, objects):
cmd = [msvc.linker, '/DLL', '/INCREMENTAL:NO', '/VERSION:'+ver, cmd = [msvc.linker, '/DLL', '/VERSION:'+ver, '/OUT:'+dll,
'/OUT:'+dll, '/nologo', '/MACHINE:X86'] + objects + \ '/nologo', '/MACHINE:'+machine] + dlflags + objects + \
[self.embed_resources(dll), [self.embed_resources(dll),
'/LIBPATH:C:/Python%s/libs'%self.py_ver, '/LIBPATH:C:/Python%s/libs'%self.py_ver,
'python%s.lib'%self.py_ver, 'python%s.lib'%self.py_ver,
'/delayload:python%s.dll'%self.py_ver] '/delayload:python%s.dll'%self.py_ver]
self.info('Linking calibre-launcher.dll') self.info('Linking calibre-launcher.dll')
self.run_builder(cmd) self.run_builder(cmd, show_output=True)
src = self.j(base, 'main.c') src = self.j(base, 'main.c')
shutil.copy2(dll, self.base) shutil.copy2(dll, self.base)
@ -541,16 +565,16 @@ class Win32Freeze(Command, WixMixIn):
dest = self.j(self.obj_dir, bname+'.obj') dest = self.j(self.obj_dir, bname+'.obj')
if self.newer(dest, [src]+headers): if self.newer(dest, [src]+headers):
self.info('Compiling', bname) self.info('Compiling', bname)
cmd = [msvc.cc] + xflags + ['/Tc'+src, '/Fo'+dest] cmd = [msvc.cc] + xflags + dflags + ['/Tc'+src, '/Fo'+dest]
self.run_builder(cmd) self.run_builder(cmd)
exe = self.j(self.base, bname+'.exe') exe = self.j(self.base, bname+'.exe')
lib = dll.replace('.dll', '.lib') lib = dll.replace('.dll', '.lib')
if self.newer(exe, [dest, lib, self.rc_template, __file__]): if self.newer(exe, [dest, lib, self.rc_template, __file__]):
self.info('Linking', bname) self.info('Linking', bname)
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86', cmd = [msvc.linker] + ['/MACHINE:'+machine,
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:'+subsys, '/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:'+subsys,
'/LIBPATH:C:/Python%s/libs'%self.py_ver, '/RELEASE', '/LIBPATH:C:/Python%s/libs'%self.py_ver, '/RELEASE',
'/OUT:'+exe, self.embed_resources(exe), '/OUT:'+exe] + dlflags + [self.embed_resources(exe),
dest, lib] dest, lib]
self.run_builder(cmd) self.run_builder(cmd)
@ -563,12 +587,18 @@ class Win32Freeze(Command, WixMixIn):
for x in (self.plugins_dir, self.dll_dir): for x in (self.plugins_dir, self.dll_dir):
for pyd in os.listdir(x): for pyd in os.listdir(x):
if pyd.endswith('.pyd') and pyd not in { if pyd.endswith('.pyd') and pyd not in {
'unrar.pyd', 'sqlite_custom.pyd', 'calibre_style.pyd'}:
# sqlite_custom has to be a file for # sqlite_custom has to be a file for
# sqlite_load_extension to work # sqlite_load_extension to work
# For some reason unrar.pyd crashes when processing 'sqlite_custom.pyd',
# password protected RAR files if loaded from inside # calibre_style has to be loaded by Qt therefore it
# pylib.zip # must be a file
'calibre_style.pyd',
# Because of https://github.com/fancycode/MemoryModule/issues/4
# any extensions that use C++ exceptions must be loaded
# from files
'unrar.pyd', 'wpd.pyd', 'podofo.pyd',
'progress_indicator.pyd',
}:
self.add_to_zipfile(zf, pyd, x) self.add_to_zipfile(zf, pyd, x)
os.remove(self.j(x, pyd)) os.remove(self.j(x, pyd))
@ -581,6 +611,7 @@ class Win32Freeze(Command, WixMixIn):
sp = self.j(self.lib_dir, 'site-packages') sp = self.j(self.lib_dir, 'site-packages')
# Special handling for PIL and pywin32 # Special handling for PIL and pywin32
handled = set(['PIL.pth', 'pywin32.pth', 'PIL', 'win32']) handled = set(['PIL.pth', 'pywin32.pth', 'PIL', 'win32'])
if not is64bit:
self.add_to_zipfile(zf, 'PIL', sp) self.add_to_zipfile(zf, 'PIL', sp)
base = self.j(sp, 'win32', 'lib') base = self.j(sp, 'win32', 'lib')
for x in os.listdir(base): for x in os.listdir(base):
@ -593,16 +624,17 @@ class Win32Freeze(Command, WixMixIn):
self.add_to_zipfile(zf, x, base) self.add_to_zipfile(zf, x, base)
handled.add('easy-install.pth') handled.add('easy-install.pth')
# We dont want the site.py from site-packages
handled.add('site.pyo')
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')): for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
handled.add(self.b(d)) handled.add(self.b(d))
for x in os.listdir(d): for x in os.listdir(d):
if x == 'EGG-INFO': if x in {'EGG-INFO', 'site.py', 'site.pyc', 'site.pyo'}:
continue continue
self.add_to_zipfile(zf, x, d) self.add_to_zipfile(zf, x, d)
# The rest of site-packages # The rest of site-packages
# We dont want the site.py from site-packages
handled.add('site.pyo')
for x in os.listdir(sp): for x in os.listdir(sp):
if x in handled or x.endswith('.egg-info'): if x in handled or x.endswith('.egg-info'):
continue continue
@ -622,8 +654,10 @@ class Win32Freeze(Command, WixMixIn):
line = line.strip() line = line.strip()
if not line or line.startswith('#') or line.startswith('import'): if not line or line.startswith('#') or line.startswith('import'):
continue continue
candidate = self.j(base, line) candidate = os.path.abspath(self.j(base, line))
if os.path.exists(candidate): if os.path.exists(candidate):
if not os.path.isdir(candidate):
raise ValueError('%s is not a directory'%candidate)
yield candidate yield candidate
def add_to_zipfile(self, zf, name, base, exclude=frozenset()): def add_to_zipfile(self, zf, name, base, exclude=frozenset()):

View File

@ -109,10 +109,8 @@ of mimetypes from the windows registry
Python packages Python packages
------------------ ------------------
Install setuptools from http://pypi.python.org/pypi/setuptools If there are no Install setuptools from http://pypi.python.org/pypi/setuptools. Use the source
windows binaries already compiled for the version of python you are using then tarball. Edit setup.py and set zip_safe=False. Then run::
download the source and run the following command in the folder where the
source has been unpacked::
python setup.py install python setup.py install

View File

@ -418,9 +418,12 @@ static BOOL move_program() {
} }
if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) { if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) {
show_last_error(L"Failed to move calibre program folder"); Sleep(4000); // Sleep and try again
if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) {
show_last_error(L"Failed to move calibre program folder. This is usually caused by an antivirus program or a file sync program like DropBox. Turn them off temporarily and try again. Underlying error: ");
return false; return false;
} }
}
if (!directory_exists(L"..\\Calibre Library")) { if (!directory_exists(L"..\\Calibre Library")) {
MoveFileEx(L"Calibre Portable\\Calibre Library", L"..\\Calibre Library", 0); MoveFileEx(L"Calibre Portable\\Calibre Library", L"..\\Calibre Library", 0);

View File

@ -16,6 +16,7 @@ static char python_dll[] = PYDLL;
void set_gui_app(char yes) { GUI_APP = yes; } void set_gui_app(char yes) { GUI_APP = yes; }
char is_gui_app() { return GUI_APP; } char is_gui_app() { return GUI_APP; }
int calibre_show_python_error(const wchar_t *preamble, int code);
// memimporter {{{ // memimporter {{{
@ -63,17 +64,6 @@ static void* FindLibrary(char *name, PyObject *callback)
return p; return p;
} }
static PyObject *set_find_proc(PyObject *self, PyObject *args)
{
PyObject *callback = NULL;
if (!PyArg_ParseTuple(args, "|O:set_find_proc", &callback))
return NULL;
Py_DECREF((PyObject *)findproc_data);
Py_INCREF(callback);
findproc_data = (void *)callback;
return Py_BuildValue("i", 1);
}
static PyObject * static PyObject *
import_module(PyObject *self, PyObject *args) import_module(PyObject *self, PyObject *args)
{ {
@ -92,7 +82,7 @@ import_module(PyObject *self, PyObject *args)
&data, &size, &data, &size,
&initfuncname, &modname, &pathname)) &initfuncname, &modname, &pathname))
return NULL; return NULL;
hmem = MemoryLoadLibrary(pathname, data); hmem = MemoryLoadLibrary(data);
if (!hmem) { if (!hmem) {
PyErr_Format(*DLL_ImportError, PyErr_Format(*DLL_ImportError,
"MemoryLoadLibrary() failed loading %s", pathname); "MemoryLoadLibrary() failed loading %s", pathname);
@ -119,14 +109,14 @@ import_module(PyObject *self, PyObject *args)
static PyMethodDef methods[] = { static PyMethodDef methods[] = {
{ "import_module", import_module, METH_VARARGS, { "import_module", import_module, METH_VARARGS,
"import_module(code, initfunc, dllname[, finder]) -> module" }, "import_module(code, initfunc, dllname[, finder]) -> module" },
{ "set_find_proc", set_find_proc, METH_VARARGS },
{ NULL, NULL }, /* Sentinel */ { NULL, NULL }, /* Sentinel */
}; };
// }}} // }}}
static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int code) { static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int code) {
wchar_t *buf, *cbuf; wchar_t *buf;
char *cbuf;
buf = (wchar_t*)LocalAlloc(LMEM_ZEROINIT, sizeof(wchar_t)* buf = (wchar_t*)LocalAlloc(LMEM_ZEROINIT, sizeof(wchar_t)*
(wcslen(msg) + wcslen(preamble) + 80)); (wcslen(msg) + wcslen(preamble) + 80));
@ -142,7 +132,7 @@ static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int co
else { else {
cbuf = (char*) calloc(10+(wcslen(buf)*4), sizeof(char)); cbuf = (char*) calloc(10+(wcslen(buf)*4), sizeof(char));
if (cbuf) { if (cbuf) {
if (WideCharToMultiByte(CP_UTF8, 0, buf, -1, cbuf, 10+(wcslen(buf)*4), NULL, NULL) != 0) printf_s(cbuf); if (WideCharToMultiByte(CP_UTF8, 0, buf, -1, cbuf, (int)(10+(wcslen(buf)*4)), NULL, NULL) != 0) printf_s(cbuf);
free(cbuf); free(cbuf);
} }
} }
@ -165,6 +155,7 @@ int show_last_error_crt(wchar_t *preamble) {
int show_last_error(wchar_t *preamble) { int show_last_error(wchar_t *preamble) {
wchar_t *msg = NULL; wchar_t *msg = NULL;
DWORD dw = GetLastError(); DWORD dw = GetLastError();
int ret;
FormatMessage( FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_ALLOCATE_BUFFER |
@ -173,10 +164,13 @@ int show_last_error(wchar_t *preamble) {
NULL, NULL,
dw, dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
&msg, (LPWSTR)&msg,
0, NULL ); 0,
NULL );
return _show_error(preamble, msg, (int)dw); ret = _show_error(preamble, msg, (int)dw);
if (msg != NULL) LocalFree(msg);
return ret;
} }
char* get_app_dir() { char* get_app_dir() {
@ -254,10 +248,10 @@ void setup_stream(const char *name, const char *errors, UINT cp) {
else if (cp == CP_UTF7) _snprintf_s(buf, 100, _TRUNCATE, "%s", "utf-7"); else if (cp == CP_UTF7) _snprintf_s(buf, 100, _TRUNCATE, "%s", "utf-7");
else _snprintf_s(buf, 100, _TRUNCATE, "cp%d", cp); else _snprintf_s(buf, 100, _TRUNCATE, "cp%d", cp);
stream = PySys_GetObject(name); stream = PySys_GetObject((char*)name);
if (!PyFile_SetEncodingAndErrors(stream, buf, errors)) if (!PyFile_SetEncodingAndErrors(stream, buf, (char*)errors))
ExitProcess(calibre_show_python_error("Failed to set stream encoding", 1)); ExitProcess(calibre_show_python_error(L"Failed to set stream encoding", 1));
free(buf); free(buf);
@ -372,7 +366,6 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
} }
PySys_SetObject("argv", argv); PySys_SetObject("argv", argv);
findproc = FindLibrary;
Py_InitModule3("_memimporter", methods, module_doc); Py_InitModule3("_memimporter", methods, module_doc);
} }

View File

@ -2,7 +2,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi' xmlns:util="http://schemas.microsoft.com/wix/UtilExtension" <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi' xmlns:util="http://schemas.microsoft.com/wix/UtilExtension"
> >
<Product Name='{app}' Id='*' UpgradeCode='{upgrade_code}' <Product Name='{app}{x64}' Id='*' UpgradeCode='{upgrade_code}'
Language='1033' Codepage='1252' Version='{version}' Manufacturer='Kovid Goyal'> Language='1033' Codepage='1252' Version='{version}' Manufacturer='Kovid Goyal'>
<Package Id='*' Keywords='Installer' Description="{app} Installer" <Package Id='*' Keywords='Installer' Description="{app} Installer"
@ -29,19 +29,24 @@
Language="1033" Language="1033"
Property="NEWPRODUCTFOUND"/> Property="NEWPRODUCTFOUND"/>
</Upgrade> </Upgrade>
<CustomAction Id="PreventDowngrading" Error="Newer version already installed."/> <CustomAction Id="PreventDowngrading" Error="Newer version of {app} already installed. If you want to downgrade you must uninstall {app} first."/>
<Property Id="APPLICATIONFOLDER"> <Property Id="APPLICATIONFOLDER">
<RegistrySearch Id='calibreInstDir' Type='raw' <RegistrySearch Id='calibreInstDir' Type='raw'
Root='HKLM' Key="Software\{app}\Installer" Name="InstallPath" /> Root='HKLM' Key="Software\{app}{x64}\Installer" Name="InstallPath" />
</Property> </Property>
<Directory Id='TARGETDIR' Name='SourceDir'> <Directory Id='TARGETDIR' Name='SourceDir'>
<Directory Id='ProgramFilesFolder' Name='PFiles'> <Directory Id='{ProgramFilesFolder}' Name='PFiles'>
<Directory Id='APPLICATIONFOLDER' Name='{app}' /> <!-- The name must be calibre on 32 bit to ensure
that the component guids dont change compared
to previous msis. However, on 64 bit it must
be Calibre2 otherwise by default it will
install to C:\Program Files\calibre -->
<Directory Id='APPLICATIONFOLDER' Name="{appfolder}" />
</Directory> </Directory>
<Directory Id="ProgramMenuFolder"> <Directory Id="ProgramMenuFolder">
<Directory Id="ApplicationProgramsFolder" Name="{app} - E-book Management"/> <Directory Id="ApplicationProgramsFolder" Name="{app}{x64} - E-book Management"/>
</Directory> </Directory>
<Directory Id="DesktopFolder" Name="Desktop"/> <Directory Id="DesktopFolder" Name="Desktop"/>
</Directory> </Directory>
@ -50,24 +55,24 @@
{app_components} {app_components}
<Component Id="AddToPath" Guid="*"> <Component Id="AddToPath" Guid="*">
<Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' /> <Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' />
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/> <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
</Component> </Component>
<Component Id="RememberInstallDir" Guid="*"> <Component Id="RememberInstallDir" Guid="*">
<RegistryValue Root="HKLM" Key="Software\{app}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/> <RegistryValue Root="HKLM" Key="Software\{app}{x64}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
</Component> </Component>
</DirectoryRef> </DirectoryRef>
<DirectoryRef Id="ApplicationProgramsFolder"> <DirectoryRef Id="ApplicationProgramsFolder">
<Component Id="StartMenuShortcuts" Guid="*"> <Component Id="StartMenuShortcuts" Guid="*">
<Shortcut Id="s1" Name="{app} - E-book management" <Shortcut Id="s1" Name="{app}{x64} - E-book management"
Description="Manage your e-book collection and download news" Description="Manage your e-book collection and download news"
Target="[#{exe_map[calibre]}]" Target="[#{exe_map[calibre]}]"
WorkingDirectory="APPLICATIONROOTDIRECTORY" /> WorkingDirectory="APPLICATIONROOTDIRECTORY" />
<Shortcut Id="s2" Name="E-book viewer" <Shortcut Id="s2" Name="E-book viewer{x64}"
Description="Viewer for all the major e-book formats" Description="Viewer for all the major e-book formats"
Target="[#{exe_map[ebook-viewer]}]" Target="[#{exe_map[ebook-viewer]}]"
WorkingDirectory="APPLICATIONROOTDIRECTORY" /> WorkingDirectory="APPLICATIONROOTDIRECTORY" />
<Shortcut Id="s3" Name="LRF viewer" <Shortcut Id="s3" Name="LRF viewer{x64}"
Description="Viewer for LRF format e-books" Description="Viewer for LRF format e-books"
Target="[#{exe_map[lrfviewer]}]" Target="[#{exe_map[lrfviewer]}]"
WorkingDirectory="APPLICATIONROOTDIRECTORY" /> WorkingDirectory="APPLICATIONROOTDIRECTORY" />
@ -79,17 +84,17 @@
Target="http://calibre-ebook.com/get-involved"/> Target="http://calibre-ebook.com/get-involved"/>
<RemoveFolder Id="ApplicationProgramsFolder" On="uninstall"/> <RemoveFolder Id="ApplicationProgramsFolder" On="uninstall"/>
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="start_menu_shortcuts_installed" Type="integer" Value="1" KeyPath="yes"/> <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="start_menu_shortcuts_installed" Type="integer" Value="1" KeyPath="yes"/>
</Component> </Component>
</DirectoryRef> </DirectoryRef>
<DirectoryRef Id="DesktopFolder"> <DirectoryRef Id="DesktopFolder">
<Component Id="DesktopShortcut" Guid="*"> <Component Id="DesktopShortcut" Guid="*">
<Shortcut Id="ds1" Name="{app} - E-book management" <Shortcut Id="ds1" Name="{app}{x64} - E-book management"
Description="Manage your e-book collection and download news" Description="Manage your e-book collection and download news"
Target="[#{exe_map[calibre]}]" Target="[#{exe_map[calibre]}]"
WorkingDirectory="APPLICATIONROOTDIRECTORY" /> WorkingDirectory="APPLICATIONROOTDIRECTORY" />
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="desktop_shortcut_installed" Type="integer" Value="1" KeyPath="yes"/> <RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="desktop_shortcut_installed" Type="integer" Value="1" KeyPath="yes"/>
</Component> </Component>
</DirectoryRef> </DirectoryRef>
@ -122,17 +127,35 @@
<!-- Add icon to entry in Add/Remove programs --> <!-- Add icon to entry in Add/Remove programs -->
<Icon Id="main_icon" SourceFile="{main_icon}"/> <Icon Id="main_icon" SourceFile="{main_icon}"/>
<Property Id="ARPPRODUCTICON" Value="main_icon" /> <Property Id="ARPPRODUCTICON" Value="main_icon" />
<Property Id="ARPURLINFOABOUT" Value="http://calibre-ebook.com" />
<Property Id='ARPHELPLINK' Value="http://calibre-ebook.com/help" />
<Property Id='ARPURLUPDATEINFO' Value="http://calibre-ebook.com/download_windows" />
<SetProperty Id="ARPINSTALLLOCATION" Value="[APPLICATIONFOLDER]" After="CostFinalize" />
<Condition <Condition
Message="This application is only supported on Windows XP SP3, or higher."> Message="This application is only supported on {minverhuman}, or higher.">
<![CDATA[Installed OR (VersionNT >= 501)]]> <![CDATA[Installed OR (VersionNT >= {minver})]]>
</Condition> </Condition>
<!-- On 64 bit installers there is a bug in WiX that causes the
WixSetDefaultPerMachineFolder action to incorrectly set
APPLICATIONFOLDER to the x86 value, so we override it. See
http://stackoverflow.com/questions/5479790/wix-how-to-override-c-program-files-x86-on-x64-machine-in-wixui-advanced-s
-->
<CustomAction
Id="OverwriteWixSetDefaultPerMachineFolder"
Property="WixPerMachineFolder"
Value="[APPLICATIONFOLDER]"
Execute="immediate"
/>
<InstallExecuteSequence> <InstallExecuteSequence>
<Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom> <Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
{fix_wix}
<RemoveExistingProducts After="InstallFinalize" /> <RemoveExistingProducts After="InstallFinalize" />
</InstallExecuteSequence> </InstallExecuteSequence>
<InstallUISequence> <InstallUISequence>
<Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom> <Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
{fix_wix}
</InstallUISequence> </InstallUISequence>
<UI> <UI>

View File

@ -6,11 +6,20 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, shutil, subprocess import os, shutil, subprocess, sys
from setup import __appname__, __version__, basenames from setup import __appname__, __version__, basenames
from setup.build_environment import is64bit
if is64bit:
WIXP = r'C:\Program Files (x86)\WiX Toolset v3.6'
UPGRADE_CODE = '5DD881FF-756B-4097-9D82-8C0F11D521EA'
MINVERHUMAN = 'Windows Vista'
else:
WIXP = r'C:\Program Files\WiX Toolset v3.6'
UPGRADE_CODE = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1'
MINVERHUMAN = 'Windows XP SP3'
WIXP = r'C:\Program Files\Windows Installer XML v3.5'
CANDLE = WIXP+r'\bin\candle.exe' CANDLE = WIXP+r'\bin\candle.exe'
LIGHT = WIXP+r'\bin\light.exe' LIGHT = WIXP+r'\bin\light.exe'
@ -28,8 +37,14 @@ class WixMixIn:
components = self.get_components_from_files() components = self.get_components_from_files()
wxs = template.format( wxs = template.format(
app = __appname__, app = __appname__,
appfolder = 'Calibre2' if is64bit else __appname__,
version = __version__, version = __version__,
upgrade_code = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1', upgrade_code = UPGRADE_CODE,
ProgramFilesFolder = 'ProgramFiles64Folder' if is64bit else 'ProgramFilesFolder',
x64 = ' 64bit' if is64bit else '',
minverhuman = MINVERHUMAN,
minver = '600' if is64bit else '501',
fix_wix = '<Custom Action="OverwriteWixSetDefaultPerMachineFolder" After="WixSetDefaultPerMachineFolder" />' if is64bit else '',
compression = self.opts.msi_compression, compression = self.opts.msi_compression,
app_components = components, app_components = components,
exe_map = self.smap, exe_map = self.smap,
@ -48,14 +63,15 @@ class WixMixIn:
with open(enusf, 'wb') as f: with open(enusf, 'wb') as f:
f.write(enus) f.write(enus)
wixobj = self.j(self.installer_dir, __appname__+'.wixobj') wixobj = self.j(self.installer_dir, __appname__+'.wixobj')
cmd = [CANDLE, '-nologo', '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf] arch = 'x64' if is64bit else 'x86'
cmd = [CANDLE, '-nologo', '-arch', arch, '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf]
self.info(*cmd) self.info(*cmd)
subprocess.check_call(cmd) self.run_wix(cmd)
self.installer = self.j(self.src_root, 'dist') self.installer = self.j(self.src_root, 'dist')
if not os.path.exists(self.installer): if not os.path.exists(self.installer):
os.makedirs(self.installer) os.makedirs(self.installer)
self.installer = self.j(self.installer, '%s-%s.msi' % (__appname__, self.installer = self.j(self.installer, '%s%s-%s.msi' % (__appname__,
__version__)) ('-64bit' if is64bit else ''), __version__))
license = self.j(self.src_root, 'LICENSE.rtf') license = self.j(self.src_root, 'LICENSE.rtf')
banner = self.j(self.src_root, 'icons', 'wix-banner.bmp') banner = self.j(self.src_root, 'icons', 'wix-banner.bmp')
dialog = self.j(self.src_root, 'icons', 'wix-dialog.bmp') dialog = self.j(self.src_root, 'icons', 'wix-dialog.bmp')
@ -66,13 +82,27 @@ class WixMixIn:
'-dWixUILicenseRtf='+license, '-dWixUILicenseRtf='+license,
'-dWixUIBannerBmp='+banner, '-dWixUIBannerBmp='+banner,
'-dWixUIDialogBmp='+dialog] '-dWixUIDialogBmp='+dialog]
cmd.append('-sice:ICE60') # No language in dlls warning cmd.extend([
'-sice:ICE60',# No language in dlls warning
'-sice:ICE61',# Allow upgrading with same version number
'-sice:ICE40', # Re-install mode overriden
'-sice:ICE69', # Shortcut components are part of a different feature than the files they point to
])
if self.opts.no_ice: if self.opts.no_ice:
cmd.append('-sval') cmd.append('-sval')
if self.opts.verbose: if self.opts.verbose:
cmd.append('-v') cmd.append('-v')
self.info(*cmd) self.info(*cmd)
subprocess.check_call(cmd) self.run_wix(cmd)
def run_wix(self, cmd):
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
ret = p.wait()
self.info(p.stdout.read())
self.info(p.stderr.read())
if ret != 0:
sys.exit(1)
def get_components_from_files(self): def get_components_from_files(self):
@ -103,7 +133,20 @@ class WixMixIn:
(fid, f, x, checksum), (fid, f, x, checksum),
'</Component>' '</Component>'
] ]
components.append(''.join(c)) if x.endswith('.exe') and not x.startswith('pdf'):
# Add the executable to app paths so that users can
# launch it from the run dialog even if it is not on
# the path. See http://msdn.microsoft.com/en-us/library/windows/desktop/ee872121(v=vs.85).aspx
c[-1:-1] = [
('<RegistryValue Root="HKLM" '
r'Key="SOFTWARE\Microsoft\Windows\CurrentVersion\App '
r'Paths\%s" Value="[#file_%d]" Type="string" />'%(x, fid)),
('<RegistryValue Root="HKLM" '
r'Key="SOFTWARE\Microsoft\Windows\CurrentVersion\App '
r'Paths\{0}" Name="Path" Value="[APPLICATIONFOLDER]" '
'Type="string" />'.format(x)),
]
components.append('\n'.join(c))
return components return components
components = process_dir(os.path.abspath(self.base)) components = process_dir(os.path.abspath(self.base))
@ -114,4 +157,3 @@ class WixMixIn:
return '\t\t\t\t'+'\n\t\t\t\t'.join(components) return '\t\t\t\t'+'\n\t\t\t\t'.join(components)

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,7 @@ STAGING_DIR = '/root/staging'
def installers(): def installers():
installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2'))) installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2')))
installers.append(installer_name('tar.bz2', is64bit=True)) installers.append(installer_name('tar.bz2', is64bit=True))
installers.append(installer_name('msi', is64bit=True))
installers.insert(0, 'dist/%s-%s.tar.xz'%(__appname__, __version__)) installers.insert(0, 'dist/%s-%s.tar.xz'%(__appname__, __version__))
installers.append('dist/%s-portable-installer-%s.exe'%(__appname__, __version__)) installers.append('dist/%s-portable-installer-%s.exe'%(__appname__, __version__))
return installers return installers
@ -40,7 +41,7 @@ def installer_description(fname):
bits = '32' if 'i686' in fname else '64' bits = '32' if 'i686' in fname else '64'
return bits + 'bit Linux binary' return bits + 'bit Linux binary'
if fname.endswith('.msi'): if fname.endswith('.msi'):
return 'Windows installer' return 'Windows %sinstaller'%('64bit ' if '64bit' in fname else '')
if fname.endswith('.dmg'): if fname.endswith('.dmg'):
return 'OS X dmg' return 'OS X dmg'
if fname.endswith('.exe'): if fname.endswith('.exe'):

View File

@ -28,7 +28,7 @@ isunix = isosx or islinux
isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
ispy3 = sys.version_info.major > 2 ispy3 = sys.version_info.major > 2
isxp = iswindows and sys.getwindowsversion().major < 6 isxp = iswindows and sys.getwindowsversion().major < 6
is64bit = sys.maxint > (1 << 32) is64bit = sys.maxsize > (1 << 32)
isworker = os.environ.has_key('CALIBRE_WORKER') or os.environ.has_key('CALIBRE_SIMPLE_WORKER') isworker = os.environ.has_key('CALIBRE_WORKER') or os.environ.has_key('CALIBRE_SIMPLE_WORKER')
if isworker: if isworker:
os.environ.pop('CALIBRE_FORCE_ANSI', None) os.environ.pop('CALIBRE_FORCE_ANSI', None)

View File

@ -148,10 +148,10 @@ def print_basic_debug_info(out=None):
out = functools.partial(prints, file=out) out = functools.partial(prints, file=out)
import platform import platform
from calibre.constants import (__appname__, get_version, isportable, isosx, from calibre.constants import (__appname__, get_version, isportable, isosx,
isfrozen) isfrozen, is64bit)
out(__appname__, get_version(), 'Portable' if isportable else '', out(__appname__, get_version(), 'Portable' if isportable else '',
'isfrozen:', isfrozen) 'isfrozen:', isfrozen, 'is64bit:', is64bit)
out(platform.platform(), platform.system()) out(platform.platform(), platform.system(), platform.architecture())
out(platform.system_alias(platform.system(), platform.release(), out(platform.system_alias(platform.system(), platform.release(),
platform.version())) platform.version()))
out('Python', platform.python_version()) out('Python', platform.python_version())

View File

@ -182,7 +182,7 @@ def debug(ioreg_to_tmp=False, buf=None, plugins=None,
out(ioreg) out(ioreg)
if hasattr(buf, 'getvalue'): if hasattr(buf, 'getvalue'):
return buf.getvalue().decode('utf-8') return buf.getvalue().decode('utf-8', 'replace')
finally: finally:
sys.stdout = oldo sys.stdout = oldo
sys.stderr = olde sys.stderr = olde

View File

@ -232,7 +232,7 @@ class ANDROID(USBMS):
'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE', 'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID', 'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E', 'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
'NOVO7', 'MB526', '_USB#WYK7MSF8KE'] 'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
@ -243,7 +243,7 @@ class ANDROID(USBMS):
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875', 'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727', 'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E', 'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
'NOVO7', 'ADVANCED'] 'NOVO7', 'ADVANCED', 'TABLET_PC']
OSX_MAIN_MEM = 'Android Device Main Memory' OSX_MAIN_MEM = 'Android Device Main Memory'

View File

@ -20,6 +20,7 @@ from calibre.utils.config import config_dir, dynamic, prefs
from calibre.utils.date import now, parse_date from calibre.utils.date import now, parse_date
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None): def strftime(fmt='%Y/%m/%d %H:%M:%S', dt=None):
if not hasattr(dt, 'timetuple'): if not hasattr(dt, 'timetuple'):
@ -38,6 +39,7 @@ def logger():
_log = ThreadSafeLog() _log = ThreadSafeLog()
return _log return _log
class AppleOpenFeedback(OpenFeedback): class AppleOpenFeedback(OpenFeedback):
def __init__(self, plugin): def __init__(self, plugin):
@ -102,6 +104,7 @@ class AppleOpenFeedback(OpenFeedback):
return Dialog(parent, self) return Dialog(parent, self)
class DriverBase(DeviceConfig, DevicePlugin): class DriverBase(DeviceConfig, DevicePlugin):
# Needed for config_widget to work # Needed for config_widget to work
FORMATS = ['epub', 'pdf'] FORMATS = ['epub', 'pdf']
@ -133,11 +136,11 @@ class DriverBase(DeviceConfig, DevicePlugin):
False, False,
] ]
@classmethod @classmethod
def _config_base_name(cls): def _config_base_name(cls):
return 'iTunes' return 'iTunes'
class ITUNES(DriverBase): class ITUNES(DriverBase):
''' '''
Calling sequences: Calling sequences:
@ -148,6 +151,8 @@ class ITUNES(DriverBase):
open() open()
card_prefix() card_prefix()
can_handle() can_handle()
_launch_iTunes()
_discover_manual_sync_mode()
set_progress_reporter() set_progress_reporter()
get_device_information() get_device_information()
card_prefix() card_prefix()
@ -156,6 +161,7 @@ class ITUNES(DriverBase):
can_handle() can_handle()
set_progress_reporter() set_progress_reporter()
books() (once for each storage point) books() (once for each storage point)
(create self.cached_books)
settings() settings()
settings() settings()
can_handle() (~1x per second OSX while idle) can_handle() (~1x per second OSX while idle)
@ -186,14 +192,14 @@ class ITUNES(DriverBase):
free_space() free_space()
''' '''
name = 'Apple device interface' name = 'Apple iTunes interface'
gui_name = _('Apple device') gui_name = _('Apple device')
icon = I('devices/ipad.png') icon = I('devices/ipad.png')
description = _('Communicate with iTunes/iBooks.') description = _('Communicate with iTunes/iBooks.')
supported_platforms = ['osx', 'windows'] supported_platforms = ['osx', 'windows']
author = 'GRiker' author = 'GRiker'
#: The version of this plugin as a 3-tuple (major, minor, revision) #: The version of this plugin as a 3-tuple (major, minor, revision)
version = (1,1,0) version = (1, 1, 1)
DISPLAY_DISABLE_DIALOG = "display_disable_apple_driver_dialog" DISPLAY_DISABLE_DIALOG = "display_disable_apple_driver_dialog"
@ -203,7 +209,7 @@ class ITUNES(DriverBase):
USE_ITUNES_STORAGE = 2 USE_ITUNES_STORAGE = 2
OPEN_FEEDBACK_MESSAGE = _( OPEN_FEEDBACK_MESSAGE = _(
'Apple device detected, launching iTunes, please wait ...') 'Apple iDevice detected, launching iTunes, please wait ...')
BACKLOADING_ERROR_MESSAGE = _( BACKLOADING_ERROR_MESSAGE = _(
"Cannot copy books directly from iDevice. " "Cannot copy books directly from iDevice. "
"Drag from iTunes Library to desktop, then add to calibre's Library window.") "Drag from iTunes Library to desktop, then add to calibre's Library window.")
@ -218,22 +224,9 @@ class ITUNES(DriverBase):
'for more information.</p>' 'for more information.</p>'
'<p></p>') '<p></p>')
# Product IDs: VENDOR_ID = []
# 0x1291 iPod Touch PRODUCT_ID = []
# 0x1293 iPod Touch 2G BCD = []
# 0x1299 iPod Touch 3G
# 0x1292 iPhone 3G
# 0x1294 iPhone 3GS
# 0x1297 iPhone 4
# 0x129a iPad
# 0x129f iPad2 (WiFi)
# 0x12a0 iPhone 4S (GSM)
# 0x12a2 iPad2 (GSM)
# 0x12a3 iPad2 (CDMA)
# 0x12a6 iPad3 (GSM)
VENDOR_ID = [0x05ac]
PRODUCT_ID = [0x1292,0x1293,0x1294,0x1297,0x1299,0x129a,0x129f,0x12a2,0x12a3,0x12a6]
BCD = [0x01]
# Plugboard ID # Plugboard ID
DEVICE_PLUGBOARD_NAME = 'APPLE' DEVICE_PLUGBOARD_NAME = 'APPLE'
@ -329,7 +322,7 @@ class ITUNES(DriverBase):
L{books}(oncard='cardb')). L{books}(oncard='cardb')).
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES.add_books_to_metadata()") logger().info("%s.add_books_to_metadata()" % self.__class__.__name__)
task_count = float(len(self.update_list)) task_count = float(len(self.update_list))
@ -414,13 +407,13 @@ class ITUNES(DriverBase):
""" """
if not oncard: if not oncard:
if DEBUG: if DEBUG:
logger().info("ITUNES:books():") logger().info("%s.books():" % self.__class__.__name__)
if self.settings().extra_customization[self.CACHE_COVERS]: if self.settings().extra_customization[self.CACHE_COVERS]:
logger().info(" Cover fetching/caching enabled") logger().info(" Cover fetching/caching enabled")
else: else:
logger().info(" Cover fetching/caching disabled") logger().info(" Cover fetching/caching disabled")
# Fetch a list of books from iPod device connected to iTunes # Fetch a list of books from iDevice connected to iTunes
if 'iPod' in self.sources: if 'iPod' in self.sources:
booklist = BookList(logger()) booklist = BookList(logger())
cached_books = {} cached_books = {}
@ -451,7 +444,8 @@ class ITUNES(DriverBase):
cached_books[this_book.path] = { cached_books[this_book.path] = {
'title': book.name(), 'title': book.name(),
'author':book.artist().split(' & '), 'author': book.artist(),
'authors': book.artist().split(' & '),
'lib_book': library_books[this_book.path] if this_book.path in library_books else None, 'lib_book': library_books[this_book.path] if this_book.path in library_books else None,
'dev_book': book, 'dev_book': book,
'uuid': book.composer() 'uuid': book.composer()
@ -491,7 +485,8 @@ class ITUNES(DriverBase):
cached_books[this_book.path] = { cached_books[this_book.path] = {
'title': book.Name, 'title': book.Name,
'author':book.Artist.split(' & '), 'author': book.Artist,
'authors': book.Artist.split(' & '),
'lib_book': library_books[this_book.path] if this_book.path in library_books else None, 'lib_book': library_books[this_book.path] if this_book.path in library_books else None,
'uuid': book.Composer, 'uuid': book.Composer,
'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub' 'format': 'pdf' if book.KindAsString.startswith('PDF') else 'epub'
@ -556,7 +551,7 @@ class ITUNES(DriverBase):
# We need to know if iTunes sees the iPad # We need to know if iTunes sees the iPad
# It may have been ejected # It may have been ejected
if DEBUG: if DEBUG:
logger().info("ITUNES.can_handle()") logger().info("%s.can_handle()" % self.__class__.__name__)
self._launch_iTunes() self._launch_iTunes()
self.sources = self._get_sources() self.sources = self._get_sources()
@ -567,12 +562,12 @@ class ITUNES(DriverBase):
self.sources = self._get_sources() self.sources = self._get_sources()
if (not 'iPod' in self.sources) or (self.sources['iPod'] == ''): if (not 'iPod' in self.sources) or (self.sources['iPod'] == ''):
attempts -= 1 attempts -= 1
time.sleep(0.5) time.sleep(1.0)
if DEBUG: if DEBUG:
logger().warning(" waiting for connected iPad, attempt #%d" % (10 - attempts)) logger().warning(" waiting for connected iDevice, attempt #%d" % (10 - attempts))
else: else:
if DEBUG: if DEBUG:
logger().info(' found connected iPad') logger().info(' found connected iDevice')
break break
else: else:
# iTunes running, but not connected iPad # iTunes running, but not connected iPad
@ -613,26 +608,26 @@ class ITUNES(DriverBase):
sys.stdout.write('.') sys.stdout.write('.')
sys.stdout.flush() sys.stdout.flush()
if DEBUG: if DEBUG:
logger().info('ITUNES.can_handle_windows:\n confirming connected iPad') logger().info("%s.can_handle_windows:\n confirming connected iPad" % self.__class__.__name__)
self.ejected = False self.ejected = False
self._discover_manual_sync_mode() self._discover_manual_sync_mode()
return True return True
else: else:
if DEBUG: if DEBUG:
logger().info("ITUNES.can_handle_windows():\n device ejected") logger().info("%s.can_handle_windows():\n device ejected" % self.__class__.__name__)
self.ejected = True self.ejected = True
return False return False
except: except:
# iTunes connection failed, probably not running anymore # iTunes connection failed, probably not running anymore
logger().error("ITUNES.can_handle_windows():\n lost connection to iTunes") logger().error("%s.can_handle_windows():\n lost connection to iTunes" % self.__class__.__name__)
return False return False
finally: finally:
pythoncom.CoUninitialize() pythoncom.CoUninitialize()
else: else:
if DEBUG: if DEBUG:
logger().info("ITUNES:can_handle_windows():\n Launching iTunes") logger().info("%s.can_handle_windows():\n Launching iTunes" % self.__class__.__name__)
try: try:
pythoncom.CoInitialize() pythoncom.CoInitialize()
@ -645,9 +640,9 @@ class ITUNES(DriverBase):
self.sources = self._get_sources() self.sources = self._get_sources()
if (not 'iPod' in self.sources) or (self.sources['iPod'] == ''): if (not 'iPod' in self.sources) or (self.sources['iPod'] == ''):
attempts -= 1 attempts -= 1
time.sleep(0.5) time.sleep(1.0)
if DEBUG: if DEBUG:
logger().warning(" waiting for connected iPad, attempt #%d" % (10 - attempts)) logger().warning(" waiting for connected iDevice, attempt #%d" % (10 - attempts))
else: else:
if DEBUG: if DEBUG:
logger().info(' found connected iPad in iTunes') logger().info(' found connected iPad in iTunes')
@ -702,7 +697,7 @@ class ITUNES(DriverBase):
self.problem_msg = _("Some books not found in iTunes database.\n" self.problem_msg = _("Some books not found in iTunes database.\n"
"Delete using the iBooks app.\n" "Delete using the iBooks app.\n"
"Click 'Show Details' for a list.") "Click 'Show Details' for a list.")
logger().info("ITUNES:delete_books()") logger().info("%s.delete_books()" % self.__class__.__name__)
for path in paths: for path in paths:
if self.cached_books[path]['lib_book']: if self.cached_books[path]['lib_book']:
if DEBUG: if DEBUG:
@ -731,8 +726,11 @@ class ITUNES(DriverBase):
else: else:
if self.manual_sync_mode: if self.manual_sync_mode:
metadata = MetaInformation(self.cached_books[path]['title'], metadata = MetaInformation(self.cached_books[path]['title'],
[self.cached_books[path]['author']]) self.cached_books[path]['authors'])
metadata.author = self.cached_books[path]['author']
metadata.uuid = self.cached_books[path]['uuid'] metadata.uuid = self.cached_books[path]['uuid']
if not metadata.uuid:
metadata.uuid = "unknown"
if isosx: if isosx:
self._remove_existing_copy(self.cached_books[path], metadata) self._remove_existing_copy(self.cached_books[path], metadata)
@ -754,7 +752,7 @@ class ITUNES(DriverBase):
are pending GUI jobs that need to communicate with the device. are pending GUI jobs that need to communicate with the device.
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES:eject(): ejecting '%s'" % self.sources['iPod']) logger().info("%s:eject(): ejecting '%s'" % (self.__class__.__name__, self.sources['iPod']))
if isosx: if isosx:
self.iTunes.eject(self.sources['iPod']) self.iTunes.eject(self.sources['iPod'])
elif iswindows: elif iswindows:
@ -785,7 +783,7 @@ class ITUNES(DriverBase):
In Windows, a sync-in-progress blocks this call until sync is complete In Windows, a sync-in-progress blocks this call until sync is complete
""" """
if DEBUG: if DEBUG:
logger().info("ITUNES:free_space()") logger().info("%s.free_space()" % self.__class__.__name__)
free_space = 0 free_space = 0
if isosx: if isosx:
@ -818,7 +816,7 @@ class ITUNES(DriverBase):
@return: (device name, device version, software version on device, mime type) @return: (device name, device version, software version on device, mime type)
""" """
if DEBUG: if DEBUG:
logger().info("ITUNES:get_device_information()") logger().info("%s.get_device_information()" % self.__class__.__name__)
return (self.sources['iPod'], 'hw v1.0', 'sw v1.0', 'mime type normally goes here') return (self.sources['iPod'], 'hw v1.0', 'sw v1.0', 'mime type normally goes here')
@ -828,7 +826,7 @@ class ITUNES(DriverBase):
@param outfile: file object like C{sys.stdout} or the result of an C{open} call @param outfile: file object like C{sys.stdout} or the result of an C{open} call
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES.get_file(): exporting '%s'" % path) logger().info("%s.get_file(): exporting '%s'" % (self.__class__.__name__, path))
try: try:
outfile.write(open(self.cached_books[path]['lib_book'].location().path).read()) outfile.write(open(self.cached_books[path]['lib_book'].location().path).read())
@ -859,7 +857,19 @@ class ITUNES(DriverBase):
raise OpenFeedback(self.ITUNES_SANDBOX_LOCKOUT_MESSAGE) raise OpenFeedback(self.ITUNES_SANDBOX_LOCKOUT_MESSAGE)
if DEBUG: if DEBUG:
logger().info("ITUNES.open(connected_device: %s)" % repr(connected_device)) vendor_id = "0x%x" % connected_device[0]
product_id = "0x%x" % connected_device[1]
bcd = "0x%x" % connected_device[2]
mfg = connected_device[3]
model = connected_device[4]
logger().info("%s.open(MFG: %s, VENDOR_ID: %s, MODEL: %s, BCD: %s, PRODUCT_ID: %s)" %
(self.__class__.__name__,
mfg,
vendor_id,
model,
bcd,
product_id
))
# Display a dialog recommending using 'Connect to iTunes' if user hasn't # Display a dialog recommending using 'Connect to iTunes' if user hasn't
# previously disabled the dialog # previously disabled the dialog
@ -867,7 +877,11 @@ class ITUNES(DriverBase):
raise AppleOpenFeedback(self) raise AppleOpenFeedback(self)
else: else:
if DEBUG: if DEBUG:
logger().warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE) logger().error(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE)
# Log supported DEVICE_IDs and BCDs
logger().info(" BCD: %s" % ['0x%x' % x for x in sorted(self.BCD)])
logger().info(" PRODUCT_ID: %s" % ['0x%x' % x for x in sorted(self.PRODUCT_ID)])
# Confirm/create thumbs archive # Confirm/create thumbs archive
if not os.path.exists(self.cache_dir): if not os.path.exists(self.cache_dir):
@ -908,14 +922,14 @@ class ITUNES(DriverBase):
as uuids are different as uuids are different
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES.remove_books_from_metadata()") logger().info("%s.remove_books_from_metadata()" % self.__class__.__name__)
for path in paths: for path in paths:
if DEBUG: if DEBUG:
self._dump_cached_book(self.cached_books[path], indent=2) self._dump_cached_book(self.cached_books[path], indent=2)
logger().info(" looking for '%s' by '%s' uuid:%s" % logger().info(" looking for '%s' by '%s' uuid:%s" %
(self.cached_books[path]['title'], (self.cached_books[path]['title'],
self.cached_books[path]['author'], self.cached_books[path]['author'],
self.cached_books[path]['uuid'])) repr(self.cached_books[path]['uuid'])))
# Purge the booklist, self.cached_books, thumb cache # Purge the booklist, self.cached_books, thumb cache
for i, bl_book in enumerate(booklists[0]): for i, bl_book in enumerate(booklists[0]):
@ -924,24 +938,28 @@ class ITUNES(DriverBase):
(bl_book.title, bl_book.author, bl_book.uuid)) (bl_book.title, bl_book.author, bl_book.uuid))
found = False found = False
if bl_book.uuid == self.cached_books[path]['uuid']: if bl_book.uuid and bl_book.uuid == self.cached_books[path]['uuid']:
if False: if True:
logger().info(" matched with uuid") logger().info(" --matched uuid")
booklists[0].pop(i) booklists[0].pop(i)
found = True found = True
elif bl_book.title == self.cached_books[path]['title'] and \ elif bl_book.title == self.cached_books[path]['title'] and \
bl_book.author[0] == self.cached_books[path]['author']: bl_book.author == self.cached_books[path]['author']:
if False: if True:
logger().info(" matched with title + author") logger().info(" --matched title + author")
booklists[0].pop(i) booklists[0].pop(i)
found = True found = True
if found: if found:
# Remove from self.cached_books # Remove from self.cached_books
for cb in self.cached_books: for cb in self.cached_books:
if self.cached_books[cb]['uuid'] == self.cached_books[path]['uuid']: if (self.cached_books[cb]['uuid'] == self.cached_books[path]['uuid'] and
self.cached_books[cb]['author'] == self.cached_books[path]['author'] and
self.cached_books[cb]['title'] == self.cached_books[path]['title']):
self.cached_books.pop(cb) self.cached_books.pop(cb)
break break
else:
logger().error(" '%s' not found in self.cached_books" % self.cached_books[path]['title'])
# Remove from thumb from thumb cache # Remove from thumb from thumb cache
thumb_path = path.rpartition('.')[0] + '.jpg' thumb_path = path.rpartition('.')[0] + '.jpg'
@ -964,7 +982,9 @@ class ITUNES(DriverBase):
else: else:
if DEBUG: if DEBUG:
logger().error(" unable to find '%s' by '%s' (%s)" % logger().error(" unable to find '%s' by '%s' (%s)" %
(bl_book.title, bl_book.author,bl_book.uuid)) (self.cached_books[path]['title'],
self.cached_books[path]['author'],
self.cached_books[path]['uuid']))
if False: if False:
self._dump_booklist(booklists[0], indent=2) self._dump_booklist(booklists[0], indent=2)
@ -982,7 +1002,7 @@ class ITUNES(DriverBase):
:detected_device: Device information from the device scanner :detected_device: Device information from the device scanner
""" """
if DEBUG: if DEBUG:
logger().info("ITUNES.reset()") logger().info("%s.reset()" % self.__class__.__name__)
if report_progress: if report_progress:
self.set_progress_reporter(report_progress) self.set_progress_reporter(report_progress)
@ -994,7 +1014,7 @@ class ITUNES(DriverBase):
task does not have any progress information task does not have any progress information
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES.set_progress_reporter()") logger().info("%s.set_progress_reporter()" % self.__class__.__name__)
self.report_progress = report_progress self.report_progress = report_progress
@ -1002,7 +1022,7 @@ class ITUNES(DriverBase):
# This method is called with the plugboard that matches the format # This method is called with the plugboard that matches the format
# declared in use_plugboard_ext and a device name of ITUNES # declared in use_plugboard_ext and a device name of ITUNES
if DEBUG: if DEBUG:
logger().info("ITUNES.set_plugboard()") logger().info("%s.set_plugboard()" % self.__class__.__name__)
#logger().info(' plugboard: %s' % plugboards) #logger().info(' plugboard: %s' % plugboards)
self.plugboards = plugboards self.plugboards = plugboards
self.plugboard_func = pb_func self.plugboard_func = pb_func
@ -1016,7 +1036,7 @@ class ITUNES(DriverBase):
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES.sync_booklists()") logger().info("%s.sync_booklists()" % self.__class__.__name__)
if self.update_needed: if self.update_needed:
if DEBUG: if DEBUG:
@ -1043,7 +1063,7 @@ class ITUNES(DriverBase):
particular device doesn't have any of these locations it should return 0. particular device doesn't have any of these locations it should return 0.
""" """
if DEBUG: if DEBUG:
logger().info("ITUNES:total_space()") logger().info("%s.total_space()" % self.__class__.__name__)
capacity = 0 capacity = 0
if isosx: if isosx:
if 'iPod' in self.sources: if 'iPod' in self.sources:
@ -1081,7 +1101,7 @@ class ITUNES(DriverBase):
"Click 'Show Details' for a list.") "Click 'Show Details' for a list.")
if DEBUG: if DEBUG:
logger().info("ITUNES.upload_books()") logger().info("%s.upload_books()" % self.__class__.__name__)
if isosx: if isosx:
for (i, fpath) in enumerate(files): for (i, fpath) in enumerate(files):
@ -1098,7 +1118,7 @@ class ITUNES(DriverBase):
# Add new_book to self.cached_books # Add new_book to self.cached_books
if DEBUG: if DEBUG:
logger().info("ITUNES.upload_books()") logger().info("%s.upload_books()" % self.__class__.__name__)
logger().info(" adding '%s' by '%s' uuid:%s to self.cached_books" % logger().info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
(metadata[i].title, (metadata[i].title,
authors_to_string(metadata[i].authors), authors_to_string(metadata[i].authors),
@ -1144,7 +1164,7 @@ class ITUNES(DriverBase):
# Add new_book to self.cached_books # Add new_book to self.cached_books
if DEBUG: if DEBUG:
logger().info("ITUNES.upload_books()") logger().info("%s.upload_books()" % self.__class__.__name__)
logger().info(" adding '%s' by '%s' uuid:%s to self.cached_books" % logger().info(" adding '%s' by '%s' uuid:%s to self.cached_books" %
(metadata[i].title, (metadata[i].title,
authors_to_string(metadata[i].authors), authors_to_string(metadata[i].authors),
@ -1182,7 +1202,7 @@ class ITUNES(DriverBase):
''' '''
assumes pythoncom wrapper for windows assumes pythoncom wrapper for windows
''' '''
logger().info(" ITUNES._add_device_book()") logger().info(" %s._add_device_book()" % self.__class__.__name__)
if isosx: if isosx:
import appscript import appscript
if 'iPod' in self.sources: if 'iPod' in self.sources:
@ -1292,7 +1312,7 @@ class ITUNES(DriverBase):
windows assumes pythoncom wrapper windows assumes pythoncom wrapper
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._add_library_book()") logger().info(" %s._add_library_book()" % self.__class__.__name__)
if isosx: if isosx:
import appscript import appscript
added = self.iTunes.add(appscript.mactypes.File(file)) added = self.iTunes.add(appscript.mactypes.File(file))
@ -1360,7 +1380,7 @@ class ITUNES(DriverBase):
fp = cached_book['lib_book'].Location fp = cached_book['lib_book'].Location
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._add_new_copy()") logger().info(" %s._add_new_copy()" % self.__class__.__name__)
if fpath.rpartition('.')[2].lower() == 'epub': if fpath.rpartition('.')[2].lower() == 'epub':
self._update_epub_metadata(fpath, metadata) self._update_epub_metadata(fpath, metadata)
@ -1399,7 +1419,7 @@ class ITUNES(DriverBase):
from PIL import Image as PILImage from PIL import Image as PILImage
if DEBUG: if DEBUG:
logger().info(" ITUNES._cover_to_thumb()") logger().info(" %s._cover_to_thumb()" % self.__class__.__name__)
thumb = None thumb = None
if metadata.cover: if metadata.cover:
@ -1526,7 +1546,7 @@ class ITUNES(DriverBase):
''' '''
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._create_new_book()") logger().info(" %s._create_new_book()" % self.__class__.__name__)
this_book = Book(metadata.title, authors_to_string(metadata.authors)) this_book = Book(metadata.title, authors_to_string(metadata.authors))
this_book.datetime = time.gmtime() this_book.datetime = time.gmtime()
@ -1575,7 +1595,7 @@ class ITUNES(DriverBase):
wait is passed when launching iTunes, as it seems to need a moment to come to its senses wait is passed when launching iTunes, as it seems to need a moment to come to its senses
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._discover_manual_sync_mode()") logger().info(" %s._discover_manual_sync_mode()" % self.__class__.__name__)
if wait: if wait:
time.sleep(wait) time.sleep(wait)
if isosx: if isosx:
@ -1593,7 +1613,7 @@ class ITUNES(DriverBase):
if dev_books is not None and len(dev_books): if dev_books is not None and len(dev_books):
first_book = dev_books[0] first_book = dev_books[0]
if False: if False:
logger().info(" determing manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist())) logger().info(" determining manual mode by modifying '%s' by %s" % (first_book.name(), first_book.artist()))
try: try:
first_book.bpm.set(0) first_book.bpm.set(0)
self.manual_sync_mode = True self.manual_sync_mode = True
@ -1655,8 +1675,8 @@ class ITUNES(DriverBase):
for book in booklist: for book in booklist:
if isosx: if isosx:
logger().info("%s%-40.40s %-30.30s %-10.10s %s" % logger().info("%s%-40.40s %-30.30s %-40.40s %-10.10s" %
(' '*indent,book.title, book.author, str(book.library_id)[-9:], book.uuid)) (' ' * indent, book.title, book.author, book.uuid, str(book.library_id)[-9:]))
elif iswindows: elif iswindows:
logger().info("%s%-40.40s %-30.30s" % logger().info("%s%-40.40s %-30.30s" %
(' ' * indent, book.title, book.author)) (' ' * indent, book.title, book.author))
@ -1705,13 +1725,14 @@ class ITUNES(DriverBase):
logger().info("%s%s" % (' ' * indent, '-' * len(msg))) logger().info("%s%s" % (' ' * indent, '-' * len(msg)))
if isosx: if isosx:
for cb in self.cached_books.keys(): for cb in self.cached_books.keys():
logger().info("%s%-40.40s %-30.30s %-10.10s %-10.10s %s" % logger().info("%s%-40.40s %-30.30s %-40.40s %-10.10s %-10.10s" %
(' ' * indent, (' ' * indent,
self.cached_books[cb]['title'], self.cached_books[cb]['title'],
self.cached_books[cb]['author'], self.cached_books[cb]['author'],
self.cached_books[cb]['uuid'],
str(self.cached_books[cb]['lib_book'])[-9:], str(self.cached_books[cb]['lib_book'])[-9:],
str(self.cached_books[cb]['dev_book'])[-9:], str(self.cached_books[cb]['dev_book'])[-9:],
self.cached_books[cb]['uuid'])) ))
elif iswindows: elif iswindows:
for cb in self.cached_books.keys(): for cb in self.cached_books.keys():
logger().info("%s%-40.40s %-30.30s %-4.4s %s" % logger().info("%s%-40.40s %-30.30s %-4.4s %s" %
@ -1728,7 +1749,7 @@ class ITUNES(DriverBase):
''' '''
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
logger().info(" ITUNES.__get_epub_metadata()") logger().info(" %s.__get_epub_metadata()" % self.__class__.__name__)
title = None title = None
author = None author = None
timestamp = None timestamp = None
@ -1760,7 +1781,8 @@ class ITUNES(DriverBase):
''' '''
''' '''
FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)]) FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)])
N=0; result='' N = 0
result = ''
while src: while src:
s, src = src[:length], src[length:] s, src = src[:length], src[length:]
hexa = ' '.join(["%02X" % ord(x) for x in s]) hexa = ' '.join(["%02X" % ord(x) for x in s])
@ -1806,7 +1828,7 @@ class ITUNES(DriverBase):
if iswindows: if iswindows:
dev_books = self._get_device_books_playlist() dev_books = self._get_device_books_playlist()
if DEBUG: if DEBUG:
logger().info(" ITUNES._find_device_book()") logger().info(" %s._find_device_book()" % self.__class__.__name__)
logger().info(" searching for '%s' by '%s' (%s)" % logger().info(" searching for '%s' by '%s' (%s)" %
(search['title'], search['author'], search['uuid'])) (search['title'], search['author'], search['uuid']))
attempts = 9 attempts = 9
@ -1876,7 +1898,7 @@ class ITUNES(DriverBase):
''' '''
if iswindows: if iswindows:
if DEBUG: if DEBUG:
logger().info(" ITUNES._find_library_book()") logger().info(" %s._find_library_book()" % self.__class__.__name__)
''' '''
if 'uuid' in search: if 'uuid' in search:
logger().info(" looking for '%s' by %s (%s)" % logger().info(" looking for '%s' by %s (%s)" %
@ -1909,7 +1931,6 @@ class ITUNES(DriverBase):
if DEBUG: if DEBUG:
logger().error(" no Books playlist found") logger().error(" no Books playlist found")
attempts = 9 attempts = 9
while attempts: while attempts:
# Find book whose Album field = search['uuid'] # Find book whose Album field = search['uuid']
@ -1996,7 +2017,8 @@ class ITUNES(DriverBase):
thumb_data = zfr.read(thumb_path) thumb_data = zfr.read(thumb_path)
if thumb_data == 'None': if thumb_data == 'None':
if False: if False:
logger().info(" ITUNES._generate_thumbnail()\n returning None from cover cache for '%s'" % title) logger().info(" %s._generate_thumbnail()\n returning None from cover cache for '%s'" %
(self.__class__.__name__, title))
zfr.close() zfr.close()
return None return None
except: except:
@ -2007,7 +2029,7 @@ class ITUNES(DriverBase):
return thumb_data return thumb_data
if DEBUG: if DEBUG:
logger().info(" ITUNES._generate_thumbnail('%s'):" % title) logger().info(" %s._generate_thumbnail('%s'):" % (self.__class__.__name__, title))
if isosx: if isosx:
# Fetch the artwork from iTunes # Fetch the artwork from iTunes
@ -2049,7 +2071,6 @@ class ITUNES(DriverBase):
return thumb_data return thumb_data
elif iswindows: elif iswindows:
if not book.Artwork.Count: if not book.Artwork.Count:
if DEBUG: if DEBUG:
@ -2101,7 +2122,7 @@ class ITUNES(DriverBase):
for file in myZipList: for file in myZipList:
exploded_file_size += file.file_size exploded_file_size += file.file_size
if False: if False:
logger().info(" ITUNES._get_device_book_size()") logger().info(" %s._get_device_book_size()" % self.__class__.__name__)
logger().info(" %d items in archive" % len(myZipList)) logger().info(" %d items in archive" % len(myZipList))
logger().info(" compressed: %d exploded: %d" % (compressed_size, exploded_file_size)) logger().info(" compressed: %d exploded: %d" % (compressed_size, exploded_file_size))
myZip.close() myZip.close()
@ -2112,7 +2133,7 @@ class ITUNES(DriverBase):
Assumes pythoncom wrapper for Windows Assumes pythoncom wrapper for Windows
''' '''
if DEBUG: if DEBUG:
logger().info("\n ITUNES._get_device_books()") logger().info("\n %s._get_device_books()" % self.__class__.__name__)
device_books = [] device_books = []
if isosx: if isosx:
@ -2131,14 +2152,13 @@ class ITUNES(DriverBase):
logger().error(" book_playlist not found") logger().error(" book_playlist not found")
for book in dev_books: for book in dev_books:
# This may need additional entries for international iTunes users
if book.kind() in self.Audiobooks: if book.kind() in self.Audiobooks:
if DEBUG: if DEBUG:
logger().info(" ignoring '%s' of type '%s'" % (book.name(), book.kind())) logger().info(" ignoring '%s' of type '%s'" % (book.name(), book.kind()))
else: else:
if DEBUG: if DEBUG:
logger().info(" %-30.30s %-30.30s %-40.40s [%s]" % logger().info(" %-40.40s %-30.30s %-40.40s [%s]" %
(book.name(), book.artist(), book.album(), book.kind())) (book.name(), book.artist(), book.composer(), book.kind()))
device_books.append(book) device_books.append(book)
if DEBUG: if DEBUG:
logger().info() logger().info()
@ -2165,13 +2185,12 @@ class ITUNES(DriverBase):
logger().info(" no Books playlist found") logger().info(" no Books playlist found")
for book in dev_books: for book in dev_books:
# This may need additional entries for international iTunes users
if book.KindAsString in self.Audiobooks: if book.KindAsString in self.Audiobooks:
if DEBUG: if DEBUG:
logger().info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString)) logger().info(" ignoring '%s' of type '%s'" % (book.Name, book.KindAsString))
else: else:
if DEBUG: if DEBUG:
logger().info(" %-30.30s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Album, book.KindAsString)) logger().info(" %-40.40s %-30.30s %-40.40s [%s]" % (book.Name, book.Artist, book.Composer, book.KindAsString))
device_books.append(book) device_books.append(book)
if DEBUG: if DEBUG:
logger().info() logger().info()
@ -2206,7 +2225,7 @@ class ITUNES(DriverBase):
Windows assumes pythoncom wrapper Windows assumes pythoncom wrapper
''' '''
if DEBUG: if DEBUG:
logger().info("\n ITUNES._get_library_books()") logger().info("\n %s._get_library_books()" % self.__class__.__name__)
library_books = {} library_books = {}
library_orphans = {} library_orphans = {}
@ -2381,7 +2400,7 @@ class ITUNES(DriverBase):
''' '''
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES:_launch_iTunes():\n Instantiating iTunes") logger().info(" %s._launch_iTunes():\n Instantiating iTunes" % self.__class__.__name__)
if isosx: if isosx:
import appscript import appscript
@ -2394,12 +2413,13 @@ class ITUNES(DriverBase):
running_apps = appscript.app('System Events') running_apps = appscript.app('System Events')
if not 'iTunes' in running_apps.processes.name(): if not 'iTunes' in running_apps.processes.name():
if DEBUG: if DEBUG:
logger().info( "ITUNES:_launch_iTunes(): Launching iTunes" ) logger().info("%s:_launch_iTunes(): Launching iTunes" % self.__class__.__name__)
try: try:
self.iTunes = iTunes = appscript.app('iTunes', hide=True) self.iTunes = iTunes = appscript.app('iTunes', hide=True)
except: except:
self.iTunes = None self.iTunes = None
raise UserFeedback(' ITUNES._launch_iTunes(): unable to find installed iTunes', details=None, level=UserFeedback.WARN) raise UserFeedback(' %s._launch_iTunes(): unable to find installed iTunes' %
self.__class__.__name__, details=None, level=UserFeedback.WARN)
iTunes.run() iTunes.run()
self.initial_status = 'launched' self.initial_status = 'launched'
@ -2444,10 +2464,10 @@ class ITUNES(DriverBase):
if DEBUG: if DEBUG:
logger().info(" %s %s" % (__appname__, __version__)) logger().info(" %s %s" % (__appname__, __version__))
logger().info(" [OSX %s, %s %s (%s), driver version %d.%d.%d]" % logger().info(" [OSX %s, %s %s (%s), %s driver version %d.%d.%d]" %
(platform.mac_ver()[0], (platform.mac_ver()[0],
self.iTunes.name(), self.iTunes.version(), self.initial_status, self.iTunes.name(), self.iTunes.version(), self.initial_status,
self.version[0],self.version[1],self.version[2])) self.__class__.__name__, self.version[0], self.version[1], self.version[2]))
logger().info(" communicating with iTunes via %s %s using %s binding" % (as_name, as_version, as_binding)) logger().info(" communicating with iTunes via %s %s using %s binding" % (as_name, as_version, as_binding))
logger().info(" calibre_library_path: %s" % self.calibre_library_path) logger().info(" calibre_library_path: %s" % self.calibre_library_path)
@ -2474,7 +2494,8 @@ class ITUNES(DriverBase):
self.iTunes = win32com.client.Dispatch("iTunes.Application") self.iTunes = win32com.client.Dispatch("iTunes.Application")
except: except:
self.iTunes = None self.iTunes = None
raise UserFeedback(' ITUNES._launch_iTunes(): unable to find installed iTunes', details=None, level=UserFeedback.WARN) raise UserFeedback(' %s._launch_iTunes(): unable to find installed iTunes'
% self.__class__.__name__, details=None, level=UserFeedback.WARN)
if not DEBUG: if not DEBUG:
self.iTunes.Windows[0].Minimized = True self.iTunes.Windows[0].Minimized = True
@ -2524,8 +2545,11 @@ class ITUNES(DriverBase):
Remove any iTunes orphans originally added by calibre Remove any iTunes orphans originally added by calibre
This occurs when the user deletes a book in iBooks while disconnected This occurs when the user deletes a book in iBooks while disconnected
''' '''
PURGE_ORPHANS = False
if PURGE_ORPHANS:
if DEBUG: if DEBUG:
logger().info(" ITUNES._purge_orphans()") logger().info(" %s._purge_orphans()" % self.__class__.__name__)
#self._dump_library_books(library_books) #self._dump_library_books(library_books)
#logger().info(" cached_books:\n %s" % "\n ".join(cached_books.keys())) #logger().info(" cached_books:\n %s" % "\n ".join(cached_books.keys()))
@ -2535,7 +2559,8 @@ class ITUNES(DriverBase):
str(library_books[book].description()).startswith(self.description_prefix): str(library_books[book].description()).startswith(self.description_prefix):
if DEBUG: if DEBUG:
logger().info(" '%s' not found on iDevice, removing from iTunes" % book) logger().info(" '%s' not found on iDevice, removing from iTunes" % book)
btr = { 'title':library_books[book].name(), btr = {
'title': library_books[book].name(),
'author': library_books[book].artist(), 'author': library_books[book].artist(),
'lib_book': library_books[book]} 'lib_book': library_books[book]}
self._remove_from_iTunes(btr) self._remove_from_iTunes(btr)
@ -2544,34 +2569,30 @@ class ITUNES(DriverBase):
library_books[book].Description.startswith(self.description_prefix): library_books[book].Description.startswith(self.description_prefix):
if DEBUG: if DEBUG:
logger().info(" '%s' not found on iDevice, removing from iTunes" % book) logger().info(" '%s' not found on iDevice, removing from iTunes" % book)
btr = { 'title':library_books[book].Name, btr = {
'title': library_books[book].Name,
'author': library_books[book].Artist, 'author': library_books[book].Artist,
'lib_book': library_books[book]} 'lib_book': library_books[book]}
self._remove_from_iTunes(btr) self._remove_from_iTunes(btr)
else:
if DEBUG: if DEBUG:
logger().info() logger().info(" %s._purge_orphans(disabled)" % self.__class__.__name__)
def _remove_existing_copy(self, path, metadata): def _remove_existing_copy(self, path, metadata):
''' '''
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._remove_existing_copy()") logger().info(" %s._remove_existing_copy()" % self.__class__.__name__)
if self.manual_sync_mode: if self.manual_sync_mode:
# Delete existing from Device|Books, add to self.update_list # Delete existing from Device|Books, add to self.update_list
# for deletion from booklist[0] during add_books_to_metadata # for deletion from booklist[0] during add_books_to_metadata
for book in self.cached_books: for book in self.cached_books:
if self.cached_books[book]['uuid'] == metadata.uuid or \ if (self.cached_books[book]['uuid'] == metadata.uuid or
(self.cached_books[book]['title'] == metadata.title and \ (self.cached_books[book]['title'] == metadata.title and
self.cached_books[book]['author'] == authors_to_string(metadata.authors)): self.cached_books[book]['author'] == metadata.author)):
self.update_list.append(self.cached_books[book]) self.update_list.append(self.cached_books[book])
if DEBUG:
logger().info( " deleting device book '%s'" % (metadata.title))
self._remove_from_device(self.cached_books[book]) self._remove_from_device(self.cached_books[book])
if DEBUG:
logger().info(" deleting library book '%s'" % metadata.title)
self._remove_from_iTunes(self.cached_books[book]) self._remove_from_iTunes(self.cached_books[book])
break break
else: else:
@ -2581,9 +2602,9 @@ class ITUNES(DriverBase):
# Delete existing from Library|Books, add to self.update_list # Delete existing from Library|Books, add to self.update_list
# for deletion from booklist[0] during add_books_to_metadata # for deletion from booklist[0] during add_books_to_metadata
for book in self.cached_books: for book in self.cached_books:
if self.cached_books[book]['uuid'] == metadata.uuid or \ if (self.cached_books[book]['uuid'] == metadata.uuid or
(self.cached_books[book]['title'] == metadata.title and \ (self.cached_books[book]['title'] == metadata.title and \
self.cached_books[book]['author'] == authors_to_string(metadata.authors)): self.cached_books[book]['author'] == metadata.author)):
self.update_list.append(self.cached_books[book]) self.update_list.append(self.cached_books[book])
if DEBUG: if DEBUG:
logger().info(" deleting library book '%s'" % metadata.title) logger().info(" deleting library book '%s'" % metadata.title)
@ -2598,7 +2619,7 @@ class ITUNES(DriverBase):
Windows assumes pythoncom wrapper Windows assumes pythoncom wrapper
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._remove_from_device()") logger().info(" %s._remove_from_device()" % self.__class__.__name__)
if isosx: if isosx:
if DEBUG: if DEBUG:
logger().info(" deleting '%s' from iDevice" % cached_book['title']) logger().info(" deleting '%s' from iDevice" % cached_book['title'])
@ -2622,7 +2643,7 @@ class ITUNES(DriverBase):
iTunes does not delete books from storage when removing from database via automation iTunes does not delete books from storage when removing from database via automation
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._remove_from_iTunes():") logger().info(" %s._remove_from_iTunes():" % self.__class__.__name__)
if isosx: if isosx:
''' Manually remove the book from iTunes storage ''' ''' Manually remove the book from iTunes storage '''
@ -2664,7 +2685,8 @@ class ITUNES(DriverBase):
except: except:
# We get here if there was an error with .location().path # We get here if there was an error with .location().path
if DEBUG: if DEBUG:
logger().info(" '%s' not found in iTunes storage" % cached_book['title']) logger().info(" '%s' by %s not found in iTunes storage" %
(cached_book['title'], cached_book['author']))
# Delete the book from the iTunes database # Delete the book from the iTunes database
try: try:
@ -2739,7 +2761,7 @@ class ITUNES(DriverBase):
from lxml import etree from lxml import etree
if DEBUG: if DEBUG:
logger().info(" ITUNES._update_epub_metadata()") logger().info(" %s._update_epub_metadata()" % self.__class__.__name__)
# Fetch plugboard updates # Fetch plugboard updates
metadata_x = self._xform_metadata_via_plugboard(metadata, 'epub') metadata_x = self._xform_metadata_via_plugboard(metadata, 'epub')
@ -2807,7 +2829,7 @@ class ITUNES(DriverBase):
Trigger a sync, wait for completion Trigger a sync, wait for completion
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES:_update_device():\n %s" % msg) logger().info(" %s:_update_device():\n %s" % (self.__class__.__name__, msg))
if isosx: if isosx:
self.iTunes.update() self.iTunes.update()
@ -2855,7 +2877,7 @@ class ITUNES(DriverBase):
''' '''
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._update_iTunes_metadata()") logger().info(" %s._update_iTunes_metadata()" % self.__class__.__name__)
STRIP_TAGS = re.compile(r'<[^<]*?/?>') STRIP_TAGS = re.compile(r'<[^<]*?/?>')
@ -2907,7 +2929,7 @@ class ITUNES(DriverBase):
# If title_sort applied in plugboard, that overrides using series/index as title_sort # If title_sort applied in plugboard, that overrides using series/index as title_sort
if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]: if metadata_x.series and self.settings().extra_customization[self.USE_SERIES_AS_CATEGORY]:
if DEBUG: if DEBUG:
logger().info(" ITUNES._update_iTunes_metadata()") logger().info(" %s._update_iTunes_metadata()" % self.__class__.__name__)
logger().info(" using Series name '%s' as Genre" % metadata_x.series) logger().info(" using Series name '%s' as Genre" % metadata_x.series)
# Format the index as a sort key # Format the index as a sort key
@ -2949,7 +2971,6 @@ class ITUNES(DriverBase):
db_added.genre.set(tag) db_added.genre.set(tag)
break break
elif metadata_x.tags is not None: elif metadata_x.tags is not None:
if DEBUG: if DEBUG:
logger().info(" %susing Tag as Genre" % logger().info(" %susing Tag as Genre" %
@ -3089,7 +3110,7 @@ class ITUNES(DriverBase):
Ensure iDevice metadata is writable. Direct connect mode only Ensure iDevice metadata is writable. Direct connect mode only
''' '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._wait_for_writable_metadata()") logger().info(" %s._wait_for_writable_metadata()" % self.__class__.__name__)
logger().warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE) logger().warning(" %s" % self.UNSUPPORTED_DIRECT_CONNECT_MODE_MESSAGE)
attempts = 9 attempts = 9
@ -3113,7 +3134,7 @@ class ITUNES(DriverBase):
def _xform_metadata_via_plugboard(self, book, format): def _xform_metadata_via_plugboard(self, book, format):
''' Transform book metadata from plugboard templates ''' ''' Transform book metadata from plugboard templates '''
if DEBUG: if DEBUG:
logger().info(" ITUNES._xform_metadata_via_plugboard()") logger().info(" %s._xform_metadata_via_plugboard()" % self.__class__.__name__)
if self.plugboard_func: if self.plugboard_func:
pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards) pb = self.plugboard_func(self.DEVICE_PLUGBOARD_NAME, format, self.plugboards)
@ -3143,6 +3164,7 @@ class ITUNES(DriverBase):
newmi = book newmi = book
return newmi return newmi
class ITUNES_ASYNC(ITUNES): class ITUNES_ASYNC(ITUNES):
''' '''
This subclass allows the user to interact directly with iTunes via a menu option This subclass allows the user to interact directly with iTunes via a menu option
@ -3160,7 +3182,7 @@ class ITUNES_ASYNC(ITUNES):
def __init__(self, path): def __init__(self, path):
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC:__init__()") logger().info("%s.__init__()" % self.__class__.__name__)
try: try:
import appscript import appscript
@ -3210,7 +3232,7 @@ class ITUNES_ASYNC(ITUNES):
""" """
if not oncard: if not oncard:
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC:books()") logger().info("%s.books()" % self.__class__.__name__)
if self.settings().extra_customization[self.CACHE_COVERS]: if self.settings().extra_customization[self.CACHE_COVERS]:
logger().info(" Cover fetching/caching enabled") logger().info(" Cover fetching/caching enabled")
else: else:
@ -3324,7 +3346,7 @@ class ITUNES_ASYNC(ITUNES):
are pending GUI jobs that need to communicate with the device. are pending GUI jobs that need to communicate with the device.
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC:eject()") logger().info("%s.eject()" % self.__class__.__name__)
self.iTunes = None self.iTunes = None
self.connected = False self.connected = False
@ -3339,7 +3361,7 @@ class ITUNES_ASYNC(ITUNES):
particular device doesn't have any of these locations it should return -1. particular device doesn't have any of these locations it should return -1.
""" """
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC:free_space()") logger().info("%s.free_space()" % self.__class__.__name__)
free_space = 0 free_space = 0
if isosx: if isosx:
s = os.statvfs(os.sep) s = os.statvfs(os.sep)
@ -3356,7 +3378,7 @@ class ITUNES_ASYNC(ITUNES):
@return: (device name, device version, software version on device, mime type) @return: (device name, device version, software version on device, mime type)
""" """
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC:get_device_information()") logger().info("%s.get_device_information()" % self.__class__.__name__)
return ('iTunes', 'hw v1.0', 'sw v1.0', 'mime type normally goes here') return ('iTunes', 'hw v1.0', 'sw v1.0', 'mime type normally goes here')
@ -3382,7 +3404,8 @@ class ITUNES_ASYNC(ITUNES):
raise OpenFeedback(self.ITUNES_SANDBOX_LOCKOUT_MESSAGE) raise OpenFeedback(self.ITUNES_SANDBOX_LOCKOUT_MESSAGE)
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC.open(connected_device: %s)" % repr(connected_device)) logger().info("%s.open(connected_device: %s)" %
(self.__class__.__name__, repr(connected_device)))
# Confirm/create thumbs archive # Confirm/create thumbs archive
if not os.path.exists(self.cache_dir): if not os.path.exists(self.cache_dir):
@ -3419,7 +3442,7 @@ class ITUNES_ASYNC(ITUNES):
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC.sync_booklists()") logger().info("%s.sync_booklists()" % self.__class__.__name__)
# Inform user of any problem books # Inform user of any problem books
if self.problem_titles: if self.problem_titles:
@ -3433,9 +3456,10 @@ class ITUNES_ASYNC(ITUNES):
''' '''
''' '''
if DEBUG: if DEBUG:
logger().info("ITUNES_ASYNC:unmount_device()") logger().info("%s.unmount_device()" % self.__class__.__name__)
self.connected = False self.connected = False
class BookList(list): class BookList(list):
''' '''
A list of books. Each Book object must have the fields: A list of books. Each Book object must have the fields:
@ -3488,6 +3512,7 @@ class BookList(list):
''' '''
return {} return {}
class Book(Metadata): class Book(Metadata):
''' '''
A simple class describing a book in the iTunes Books Library. A simple class describing a book in the iTunes Books Library.
@ -3495,9 +3520,9 @@ class Book(Metadata):
''' '''
def __init__(self, title, author): def __init__(self, title, author):
Metadata.__init__(self, title, authors=author.split(' & ')) Metadata.__init__(self, title, authors=author.split(' & '))
self.author = author
self.author_sort = author_to_author_sort(author) self.author_sort = author_to_author_sort(author)
@property @property
def title_sorter(self): def title_sorter(self):
return title_sort(self.title) return title_sort(self.title)

View File

@ -2357,6 +2357,8 @@ class KOBOTOUCH(KOBO):
update_query = 'UPDATE content SET Series=?, SeriesNumber==? where BookID is Null and ContentID = ?' update_query = 'UPDATE content SET Series=?, SeriesNumber==? where BookID is Null and ContentID = ?'
if book.series is None: if book.series is None:
update_values = (None, None, book.contentID, ) update_values = (None, None, book.contentID, )
elif book.series_index is None: # This should never happen, but...
update_values = (book.series, None, book.contentID, )
else: else:
update_values = (book.series, "%g"%book.series_index, book.contentID, ) update_values = (book.series, "%g"%book.series_index, book.contentID, )

View File

@ -16,6 +16,7 @@ const calibre_device_entry_t calibre_mtp_device_table[] = {
// Nexus 10 // Nexus 10
, { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS} , { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS}
, { "Google", 0x18d1, "Nexus 10", 0x4ee1, DEVICE_FLAGS_ANDROID_BUGS}
, { NULL, 0xffff, NULL, 0xffff, DEVICE_FLAG_NONE } , { NULL, 0xffff, NULL, 0xffff, DEVICE_FLAG_NONE }
}; };

View File

@ -74,11 +74,12 @@ def remove_kindlegen_markup(parts):
part = "".join(srcpieces) part = "".join(srcpieces)
parts[i] = part parts[i] = part
# we can safely remove all of the Kindlegen generated data-AmznPageBreak tags # we can safely remove all of the Kindlegen generated data-AmznPageBreak
# attributes
find_tag_with_AmznPageBreak_pattern = re.compile( find_tag_with_AmznPageBreak_pattern = re.compile(
r'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE) r'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
within_tag_AmznPageBreak_position_pattern = re.compile( within_tag_AmznPageBreak_position_pattern = re.compile(
r'''\sdata-AmznPageBreak=['"][^'"]*['"]''') r'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
for i in xrange(len(parts)): for i in xrange(len(parts)):
part = parts[i] part = parts[i]
@ -86,10 +87,8 @@ def remove_kindlegen_markup(parts):
for j in range(len(srcpieces)): for j in range(len(srcpieces)):
tag = srcpieces[j] tag = srcpieces[j]
if tag.startswith('<'): if tag.startswith('<'):
for m in within_tag_AmznPageBreak_position_pattern.finditer(tag): srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
replacement = '' lambda m:' style="page-break-after:%s"'%m.group(1), tag)
tag = within_tag_AmznPageBreak_position_pattern.sub(replacement, tag, 1)
srcpieces[j] = tag
part = "".join(srcpieces) part = "".join(srcpieces)
parts[i] = part parts[i] = part
@ -203,7 +202,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
# All flows are now unicode and have links resolved # All flows are now unicode and have links resolved
return flows return flows
def insert_flows_into_markup(parts, flows, mobi8_reader): def insert_flows_into_markup(parts, flows, mobi8_reader, log):
mr = mobi8_reader mr = mobi8_reader
# kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc) # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
@ -219,7 +218,12 @@ def insert_flows_into_markup(parts, flows, mobi8_reader):
if tag.startswith('<'): if tag.startswith('<'):
for m in flow_pattern.finditer(tag): for m in flow_pattern.finditer(tag):
num = int(m.group(1), 32) num = int(m.group(1), 32)
try:
fi = mr.flowinfo[num] fi = mr.flowinfo[num]
except IndexError:
log.warn('Ignoring invalid flow reference: %s'%m.group())
tag = ''
else:
if fi.format == 'inline': if fi.format == 'inline':
tag = flows[num] tag = flows[num]
else: else:
@ -313,7 +317,7 @@ def expand_mobi8_markup(mobi8_reader, resource_map, log):
flows = update_flow_links(mobi8_reader, resource_map, log) flows = update_flow_links(mobi8_reader, resource_map, log)
# Insert inline flows into the markup # Insert inline flows into the markup
insert_flows_into_markup(parts, flows, mobi8_reader) insert_flows_into_markup(parts, flows, mobi8_reader, log)
# Insert raster images into markup # Insert raster images into markup
insert_images_into_markup(parts, resource_map, log) insert_images_into_markup(parts, resource_map, log)

View File

@ -44,6 +44,18 @@ def locate_beg_end_of_tag(ml, aid):
return plt, pgt return plt, pgt
return 0, 0 return 0, 0
def reverse_tag_iter(block):
''' Iterate over all tags in block in reverse order, i.e. last tag
to first tag. '''
end = len(block)
while True:
pgt = block.rfind(b'>', 0, end)
if pgt == -1: break
plt = block.rfind(b'<', 0, pgt)
if plt == -1: break
yield block[plt:pgt+1]
end = plt
class Mobi8Reader(object): class Mobi8Reader(object):
def __init__(self, mobi6_reader, log): def __init__(self, mobi6_reader, log):
@ -275,13 +287,12 @@ class Mobi8Reader(object):
return '%s/%s'%(fi.type, fi.filename), idtext return '%s/%s'%(fi.type, fi.filename), idtext
def get_id_tag(self, pos): def get_id_tag(self, pos):
# find the correct tag by actually searching in the destination # Find the first tag with a named anchor (name or id attribute) before
# textblock at position # pos
fi = self.get_file_info(pos) fi = self.get_file_info(pos)
if fi.num is None and fi.start is None: if fi.num is None and fi.start is None:
raise ValueError('No file contains pos: %d'%pos) raise ValueError('No file contains pos: %d'%pos)
textblock = self.parts[fi.num] textblock = self.parts[fi.num]
id_map = []
npos = pos - fi.start npos = pos - fi.start
pgt = textblock.find(b'>', npos) pgt = textblock.find(b'>', npos)
plt = textblock.find(b'<', npos) plt = textblock.find(b'<', npos)
@ -290,28 +301,15 @@ class Mobi8Reader(object):
if plt == npos or pgt < plt: if plt == npos or pgt < plt:
npos = pgt + 1 npos = pgt + 1
textblock = textblock[0:npos] textblock = textblock[0:npos]
# find id links only inside of tags id_re = re.compile(br'''<[^>]+\sid\s*=\s*['"]([^'"]+)['"]''')
# inside any < > pair find all "id=' and return whatever is inside name_re = re.compile(br'''<\s*a\s*\sname\s*=\s*['"]([^'"]+)['"]''')
# the quotes for tag in reverse_tag_iter(textblock):
id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"][^>]*>''', m = id_re.match(tag) or name_re.match(tag)
re.IGNORECASE) if m is not None:
for m in re.finditer(id_pattern, textblock): return m.group(1)
id_map.append((m.start(), m.group(1)))
if not id_map: # No tag found, link to start of file
# Found no id in the textblock, link must be to top of file
return b'' return b''
# if npos is before first id= inside a tag, return the first
if npos < id_map[0][0]:
return id_map[0][1]
# if npos is after the last id= inside a tag, return the last
if npos > id_map[-1][0]:
return id_map[-1][1]
# otherwise find last id before npos
for i, item in enumerate(id_map):
if npos < item[0]:
return id_map[i-1][1]
return id_map[0][1]
def create_guide(self): def create_guide(self):
guide = Guide() guide = Guide()

View File

@ -320,13 +320,11 @@ class OEBReader(object):
self.logger.warn(u'Spine item %r not found' % idref) self.logger.warn(u'Spine item %r not found' % idref)
continue continue
item = manifest.ids[idref] item = manifest.ids[idref]
if item.media_type.lower() in OEB_DOCS and hasattr(item.data, 'xpath'):
spine.add(item, elem.get('linear')) spine.add(item, elem.get('linear'))
for item in spine: else:
if item.media_type.lower() not in OEB_DOCS:
if not hasattr(item.data, 'xpath'):
self.oeb.log.warn('The item %s is not a XML document.' self.oeb.log.warn('The item %s is not a XML document.'
' Removing it from spine.'%item.href) ' Removing it from spine.'%item.href)
spine.remove(item)
if len(spine) == 0: if len(spine) == 0:
raise OEBError("Spine is empty") raise OEBError("Spine is empty")
self._spine_add_extra() self._spine_add_extra()

View File

@ -114,7 +114,9 @@ class DetectStructure(object):
def find_matches(expr, doc): def find_matches(expr, doc):
try: try:
return XPath(expr)(doc) ans = XPath(expr)(doc)
len(ans)
return ans
except: except:
self.log.warn('Invalid chapter expression, ignoring: %s'%expr) self.log.warn('Invalid chapter expression, ignoring: %s'%expr)
return [] return []
@ -203,7 +205,9 @@ class DetectStructure(object):
def find_matches(expr, doc): def find_matches(expr, doc):
try: try:
return XPath(expr)(doc) ans = XPath(expr)(doc)
len(ans)
return ans
except: except:
self.log.warn('Invalid ToC expression, ignoring: %s'%expr) self.log.warn('Invalid ToC expression, ignoring: %s'%expr)
return [] return []

View File

@ -27,10 +27,10 @@ def get_custom_size(opts):
custom_size = None custom_size = None
if opts.custom_size != None: if opts.custom_size != None:
width, sep, height = opts.custom_size.partition('x') width, sep, height = opts.custom_size.partition('x')
if height != '': if height:
try: try:
width = int(width) width = float(width)
height = int(height) height = float(height)
custom_size = (width, height) custom_size = (width, height)
except: except:
custom_size = None custom_size = None

View File

@ -72,8 +72,8 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
mobi = details.xpath( mobi = details.xpath(
'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())') 'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src')) cover_url = ''.join(data.xpath('.//div[@class="coverimg"]/a/img/@src'))
price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip() price = ''.join(data.xpath('.//div[@class="preis"]/text()')).replace('*', '').strip()
counter -= 1 counter -= 1

View File

@ -8,7 +8,7 @@ from PyQt4.Qt import (QThread, pyqtSignal, Qt, QUrl, QDialog, QGridLayout,
import mechanize import mechanize
from calibre.constants import (__appname__, __version__, iswindows, isosx, from calibre.constants import (__appname__, __version__, iswindows, isosx,
isportable) isportable, is64bit)
from calibre import browser, prints, as_unicode from calibre import browser, prints, as_unicode
from calibre.utils.config import prefs from calibre.utils.config import prefs
from calibre.gui2 import config, dynamic, open_url from calibre.gui2 import config, dynamic, open_url
@ -19,6 +19,13 @@ URL = 'http://status.calibre-ebook.com/latest'
NO_CALIBRE_UPDATE = '-0.0.0' NO_CALIBRE_UPDATE = '-0.0.0'
VSEP = '|' VSEP = '|'
def get_download_url():
which = ('portable' if isportable else 'windows' if iswindows
else 'osx' if isosx else 'linux')
if which == 'windows' and is64bit:
which += '64'
return 'http://calibre-ebook.com/download_' + which
def get_newest_version(): def get_newest_version():
br = browser() br = browser()
req = mechanize.Request(URL) req = mechanize.Request(URL)
@ -116,10 +123,7 @@ class UpdateNotification(QDialog):
config.set('new_version_notification', bool(self.cb.isChecked())) config.set('new_version_notification', bool(self.cb.isChecked()))
def accept(self): def accept(self):
url = ('http://calibre-ebook.com/download_' + open_url(QUrl(get_download_url()))
('portable' if isportable else 'windows' if iswindows
else 'osx' if isosx else 'linux'))
open_url(QUrl(url))
QDialog.accept(self) QDialog.accept(self)

View File

@ -12,6 +12,7 @@ from calibre.customize import CatalogPlugin
from calibre.library.catalogs import FIELDS from calibre.library.catalogs import FIELDS
from calibre.customize.conversion import DummyReporter from calibre.customize.conversion import DummyReporter
class CSV_XML(CatalogPlugin): class CSV_XML(CatalogPlugin):
'CSV/XML catalog generator' 'CSV/XML catalog generator'
@ -227,4 +228,3 @@ class CSV_XML(CatalogPlugin):
with open(path_to_output, 'w') as f: with open(path_to_output, 'w') as f:
f.write(etree.tostring(root, encoding='utf-8', f.write(etree.tostring(root, encoding='utf-8',
xml_declaration=True, pretty_print=True)) xml_declaration=True, pretty_print=True))

View File

@ -21,6 +21,7 @@ from calibre.utils.localization import get_lang
Option = namedtuple('Option', 'option, default, dest, action, help') Option = namedtuple('Option', 'option, default, dest, action, help')
class EPUB_MOBI(CatalogPlugin): class EPUB_MOBI(CatalogPlugin):
'ePub catalog generator' 'ePub catalog generator'
@ -386,6 +387,8 @@ class EPUB_MOBI(CatalogPlugin):
if opts.fmt == 'mobi': if opts.fmt == 'mobi':
recommendations.append(('no_inline_toc', True, recommendations.append(('no_inline_toc', True,
OptionRecommendation.HIGH)) OptionRecommendation.HIGH))
recommendations.append(('verbose', 2,
OptionRecommendation.HIGH))
# Use existing cover or generate new cover # Use existing cover or generate new cover
cpath = None cpath = None
@ -442,4 +445,3 @@ class EPUB_MOBI(CatalogPlugin):
# returns to gui2.actions.catalog:catalog_generated() # returns to gui2.actions.catalog:catalog_generated()
return catalog.error return catalog.error

View File

@ -25,6 +25,7 @@ from calibre.utils.icu import capitalize, collation_order, sort_key
from calibre.utils.magick.draw import thumbnail from calibre.utils.magick.draw import thumbnail
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
class CatalogBuilder(object): class CatalogBuilder(object):
''' '''
Generates catalog source files from calibre database Generates catalog source files from calibre database
@ -98,7 +99,6 @@ class CatalogBuilder(object):
else: else:
return '&nbsp;' return '&nbsp;'
def __init__(self, db, _opts, plugin, def __init__(self, db, _opts, plugin,
report_progress=DummyReporter(), report_progress=DummyReporter(),
stylesheet="content/stylesheet.css", stylesheet="content/stylesheet.css",
@ -120,11 +120,13 @@ class CatalogBuilder(object):
_opts.output_profile and _opts.output_profile and
_opts.output_profile.startswith("kindle")) else False _opts.output_profile.startswith("kindle")) else False
self.all_series = set()
self.authors = None self.authors = None
self.bookmarked_books = None self.bookmarked_books = None
self.bookmarked_books_by_date_read = None self.bookmarked_books_by_date_read = None
self.books_by_author = None self.books_by_author = None
self.books_by_date_range = None self.books_by_date_range = None
self.books_by_description = []
self.books_by_month = None self.books_by_month = None
self.books_by_series = None self.books_by_series = None
self.books_by_title = None self.books_by_title = None
@ -139,6 +141,7 @@ class CatalogBuilder(object):
if self.opts.generate_genres else None if self.opts.generate_genres else None
self.html_filelist_1 = [] self.html_filelist_1 = []
self.html_filelist_2 = [] self.html_filelist_2 = []
self.individual_authors = None
self.merge_comments_rule = dict(zip(['field', 'position', 'hr'], self.merge_comments_rule = dict(zip(['field', 'position', 'hr'],
_opts.merge_comments_rule.split(':'))) _opts.merge_comments_rule.split(':')))
self.ncx_soup = None self.ncx_soup = None
@ -154,6 +157,7 @@ class CatalogBuilder(object):
self.total_steps = 6.0 self.total_steps = 6.0
self.use_series_prefix_in_titles_section = False self.use_series_prefix_in_titles_section = False
self.dump_custom_fields()
self.books_to_catalog = self.fetch_books_to_catalog() self.books_to_catalog = self.fetch_books_to_catalog()
self.compute_total_steps() self.compute_total_steps()
self.calculate_thumbnail_dimensions() self.calculate_thumbnail_dimensions()
@ -447,7 +451,7 @@ class CatalogBuilder(object):
hits.remove(amp) hits.remove(amp)
for hit in hits: for hit in hits:
name = hit[1:-1] name = hit[1:-1]
if htmlentitydefs.name2codepoint.has_key(name): if htmlentitydefs.name2codepoint in name:
s = s.replace(hit, unichr(htmlentitydefs.name2codepoint[name])) s = s.replace(hit, unichr(htmlentitydefs.name2codepoint[name]))
s = s.replace(amp, "&") s = s.replace(amp, "&")
return s return s
@ -586,7 +590,7 @@ class CatalogBuilder(object):
# Literal comparison for Tags field # Literal comparison for Tags field
if rule['field'].lower() == 'tags': if rule['field'].lower() == 'tags':
if rule['pattern'].lower() in map(unicode.lower, record['tags']): if rule['pattern'].lower() in map(unicode.lower, record['tags']):
if self.opts.verbose: if self.DEBUG and self.opts.verbose:
self.opts.log.info(" %s '%s' by %s (%s: Tags includes '%s')" % self.opts.log.info(" %s '%s' by %s (%s: Tags includes '%s')" %
(rule['prefix'], record['title'], (rule['prefix'], record['title'],
record['authors'][0], rule['name'], record['authors'][0], rule['name'],
@ -616,7 +620,7 @@ class CatalogBuilder(object):
try: try:
if re.search(rule['pattern'], unicode(field_contents), if re.search(rule['pattern'], unicode(field_contents),
re.IGNORECASE) is not None: re.IGNORECASE) is not None:
if self.opts.verbose: if self.DEBUG:
_log_prefix_rule_match_info(rule, record, field_contents) _log_prefix_rule_match_info(rule, record, field_contents)
return rule['prefix'] return rule['prefix']
except: except:
@ -624,12 +628,24 @@ class CatalogBuilder(object):
self.opts.log.error("pattern failed to compile: %s" % rule['pattern']) self.opts.log.error("pattern failed to compile: %s" % rule['pattern'])
pass pass
elif field_contents is None and rule['pattern'] == 'None': elif field_contents is None and rule['pattern'] == 'None':
if self.opts.verbose: if self.DEBUG:
_log_prefix_rule_match_info(rule, record, field_contents) _log_prefix_rule_match_info(rule, record, field_contents)
return rule['prefix'] return rule['prefix']
return None return None
def dump_custom_fields(self):
"""
Dump custom field mappings for debugging
"""
if self.opts.verbose:
self.opts.log.info(" Custom fields:")
all_custom_fields = self.db.custom_field_keys()
for cf in all_custom_fields:
self.opts.log.info(" %-20s %-20s %s" %
(cf, "'%s'" % self.db.metadata_for_field(cf)['name'],
self.db.metadata_for_field(cf)['datatype']))
def establish_equivalencies(self, item_list, key=None): def establish_equivalencies(self, item_list, key=None):
""" Return icu equivalent sort letter. """ Return icu equivalent sort letter.
@ -716,7 +732,8 @@ class CatalogBuilder(object):
Outputs: Outputs:
books_by_author: database, sorted by author books_by_author: database, sorted by author
authors: list of unique authors authors: list of book authors. Two credited authors are considered an
individual entity
error: author_sort mismatches error: author_sort mismatches
Return: Return:
@ -728,6 +745,13 @@ class CatalogBuilder(object):
books_by_author = list(self.books_to_catalog) books_by_author = list(self.books_to_catalog)
self.detect_author_sort_mismatches(books_by_author) self.detect_author_sort_mismatches(books_by_author)
# Assumes books_by_title already populated
# init books_by_description before relisting multiple authors
if self.opts.generate_descriptions:
books_by_description = list(books_by_author) if self.opts.sort_descriptions_by_author \
else list(self.books_by_title)
if self.opts.cross_reference_authors: if self.opts.cross_reference_authors:
books_by_author = self.relist_multiple_authors(books_by_author) books_by_author = self.relist_multiple_authors(books_by_author)
@ -737,6 +761,10 @@ class CatalogBuilder(object):
asl = [i['author_sort'] for i in books_by_author] asl = [i['author_sort'] for i in books_by_author]
las = max(asl, key=len) las = max(asl, key=len)
if self.opts.generate_descriptions:
self.books_by_description = sorted(books_by_description,
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
books_by_author = sorted(books_by_author, books_by_author = sorted(books_by_author,
key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las)))) key=lambda x: sort_key(self._kf_books_by_author_sorter_author_sort(x, len(las))))
@ -758,6 +786,7 @@ class CatalogBuilder(object):
current_author = authors[0] current_author = authors[0]
multiple_authors = False multiple_authors = False
unique_authors = [] unique_authors = []
individual_authors = set()
for (i, author) in enumerate(authors): for (i, author) in enumerate(authors):
if author != current_author: if author != current_author:
# Note that current_author and author are tuples: (friendly, sort) # Note that current_author and author are tuples: (friendly, sort)
@ -780,14 +809,23 @@ class CatalogBuilder(object):
unique_authors.append((current_author[0], icu_title(current_author[1]), unique_authors.append((current_author[0], icu_title(current_author[1]),
books_by_current_author)) books_by_current_author))
self.authors = list(unique_authors)
self.books_by_author = books_by_author
for ua in unique_authors:
for ia in ua[0].replace(' &amp; ', ' & ').split(' & '):
individual_authors.add(ia)
self.individual_authors = list(individual_authors)
if self.DEBUG and self.opts.verbose: if self.DEBUG and self.opts.verbose:
self.opts.log.info("\nfetch_books_by_author(): %d unique authors" % len(unique_authors)) self.opts.log.info("\nfetch_books_by_author(): %d unique authors" % len(unique_authors))
for author in unique_authors: for author in unique_authors:
self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20], self.opts.log.info((u" %-50s %-25s %2d" % (author[0][0:45], author[1][0:20],
author[2])).encode('utf-8')) author[2])).encode('utf-8'))
self.opts.log.info("\nfetch_books_by_author(): %d individual authors" % len(individual_authors))
for author in sorted(individual_authors):
self.opts.log.info("%s" % author)
self.authors = unique_authors
self.books_by_author = books_by_author
return True return True
def fetch_books_by_title(self): def fetch_books_by_title(self):
@ -869,6 +907,7 @@ class CatalogBuilder(object):
this_title['title'] = self.convert_html_entities(record['title']) this_title['title'] = self.convert_html_entities(record['title'])
if record['series']: if record['series']:
this_title['series'] = record['series'] this_title['series'] = record['series']
self.all_series.add(this_title['series'])
this_title['series_index'] = record['series_index'] this_title['series_index'] = record['series_index']
else: else:
this_title['series'] = None this_title['series'] = None
@ -1000,7 +1039,7 @@ class CatalogBuilder(object):
data = self.plugin.search_sort_db(self.db, self.opts) data = self.plugin.search_sort_db(self.db, self.opts)
data = self.process_exclusions(data) data = self.process_exclusions(data)
if self.opts.verbose and self.prefix_rules: if self.prefix_rules and self.DEBUG:
self.opts.log.info(" Added prefixes:") self.opts.log.info(" Added prefixes:")
# Populate this_title{} from data[{},{}] # Populate this_title{} from data[{},{}]
@ -1042,6 +1081,7 @@ class CatalogBuilder(object):
def initialize(self, save_template): def initialize(self, save_template):
self._save_template = save_template self._save_template = save_template
self.SUPPORTS_SUB_DIRS = True self.SUPPORTS_SUB_DIRS = True
def save_template(self): def save_template(self):
return self._save_template return self._save_template
@ -2070,7 +2110,6 @@ class CatalogBuilder(object):
len(genre[key]), len(genre[key]),
'titles' if len(genre[key]) > 1 else 'title')) 'titles' if len(genre[key]) > 1 else 'title'))
# Write the results # Write the results
# genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...] # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
master_genre_list = [] master_genre_list = []
@ -2107,7 +2146,8 @@ class CatalogBuilder(object):
outfile) outfile)
tag_file = "content/Genre_%s.html" % genre tag_file = "content/Genre_%s.html" % genre
master_genre_list.append({'tag':genre, master_genre_list.append({
'tag': genre,
'file': tag_file, 'file': tag_file,
'authors': unique_authors, 'authors': unique_authors,
'books': genre_tag_set[genre], 'books': genre_tag_set[genre],
@ -2935,12 +2975,10 @@ class CatalogBuilder(object):
contentTag = Tag(soup, 'content') contentTag = Tag(soup, 'content')
contentTag['src'] = "content/ByDateAdded.html" contentTag['src'] = "content/ByDateAdded.html"
navPointTag.insert(1, contentTag) navPointTag.insert(1, contentTag)
else: elif self.opts.generate_descriptions:
# Descriptions only # Descriptions only
sort_descriptions_by = self.books_by_author if self.opts.sort_descriptions_by_author \
else self.books_by_title
contentTag = Tag(soup, 'content') contentTag = Tag(soup, 'content')
contentTag['src'] = "content/book_%d.html" % int(sort_descriptions_by[0]['id']) contentTag['src'] = "content/book_%d.html" % int(self.books_by_description[0]['id'])
navPointTag.insert(1, contentTag) navPointTag.insert(1, contentTag)
if self.generate_for_kindle_mobi: if self.generate_for_kindle_mobi:
@ -2970,9 +3008,6 @@ class CatalogBuilder(object):
self.update_progress_full_step(_("NCX for Descriptions")) self.update_progress_full_step(_("NCX for Descriptions"))
sort_descriptions_by = self.books_by_author if self.opts.sort_descriptions_by_author \
else self.books_by_title
# --- Construct the 'Descriptions' section --- # --- Construct the 'Descriptions' section ---
ncx_soup = self.ncx_soup ncx_soup = self.ncx_soup
if self.generate_for_kindle_mobi: if self.generate_for_kindle_mobi:
@ -2990,19 +3025,22 @@ class CatalogBuilder(object):
self.play_order += 1 self.play_order += 1
navLabelTag = Tag(ncx_soup, 'navLabel') navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text') textTag = Tag(ncx_soup, 'text')
textTag.insert(0, NavigableString(tocTitle)) section_header = '%s [%d]' % (tocTitle, len(self.books_by_description))
if self.generate_for_kindle_mobi:
section_header = tocTitle
textTag.insert(0, NavigableString(section_header))
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
nptc = 0 nptc = 0
navPointTag.insert(nptc, navLabelTag) navPointTag.insert(nptc, navLabelTag)
nptc += 1 nptc += 1
contentTag = Tag(ncx_soup, "content") contentTag = Tag(ncx_soup, "content")
contentTag['src'] = "content/book_%d.html" % int(sort_descriptions_by[0]['id']) contentTag['src'] = "content/book_%d.html" % int(self.books_by_description[0]['id'])
navPointTag.insert(nptc, contentTag) navPointTag.insert(nptc, contentTag)
nptc += 1 nptc += 1
# Loop over the titles # Loop over the titles
for book in sort_descriptions_by: for book in self.books_by_description:
navPointVolumeTag = Tag(ncx_soup, 'navPoint') navPointVolumeTag = Tag(ncx_soup, 'navPoint')
if self.generate_for_kindle_mobi: if self.generate_for_kindle_mobi:
navPointVolumeTag['class'] = "article" navPointVolumeTag['class'] = "article"
@ -3119,7 +3157,10 @@ class CatalogBuilder(object):
self.play_order += 1 self.play_order += 1
navLabelTag = Tag(ncx_soup, 'navLabel') navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text') textTag = Tag(ncx_soup, 'text')
textTag.insert(0, NavigableString(tocTitle)) section_header = '%s [%d]' % (tocTitle, len(self.all_series))
if self.generate_for_kindle_mobi:
section_header = tocTitle
textTag.insert(0, NavigableString(section_header))
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
nptc = 0 nptc = 0
navPointTag.insert(nptc, navLabelTag) navPointTag.insert(nptc, navLabelTag)
@ -3247,7 +3288,10 @@ class CatalogBuilder(object):
self.play_order += 1 self.play_order += 1
navLabelTag = Tag(ncx_soup, 'navLabel') navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text') textTag = Tag(ncx_soup, 'text')
textTag.insert(0, NavigableString(tocTitle)) section_header = '%s [%d]' % (tocTitle, len(self.books_by_title))
if self.generate_for_kindle_mobi:
section_header = tocTitle
textTag.insert(0, NavigableString(section_header))
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
nptc = 0 nptc = 0
navPointTag.insert(nptc, navLabelTag) navPointTag.insert(nptc, navLabelTag)
@ -3377,7 +3421,10 @@ class CatalogBuilder(object):
self.play_order += 1 self.play_order += 1
navLabelTag = Tag(ncx_soup, 'navLabel') navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text') textTag = Tag(ncx_soup, 'text')
textTag.insert(0, NavigableString('%s' % tocTitle)) section_header = '%s [%d]' % (tocTitle, len(self.individual_authors))
if self.generate_for_kindle_mobi:
section_header = tocTitle
textTag.insert(0, NavigableString(section_header))
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
nptc = 0 nptc = 0
navPointTag.insert(nptc, navLabelTag) navPointTag.insert(nptc, navLabelTag)
@ -3430,7 +3477,7 @@ class CatalogBuilder(object):
fmt_string = _(u"Authors beginning with %s") fmt_string = _(u"Authors beginning with %s")
else: else:
fmt_string = _(u"Authors beginning with '%s'") fmt_string = _(u"Authors beginning with '%s'")
textTag.insert(0, NavigableString(fmt_string % (authors_by_letter[1]))) textTag.insert(0, NavigableString(fmt_string % authors_by_letter[1]))
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
navPointByLetterTag.insert(0, navLabelTag) navPointByLetterTag.insert(0, navLabelTag)
contentTag = Tag(ncx_soup, 'content') contentTag = Tag(ncx_soup, 'content')
@ -3808,7 +3855,7 @@ class CatalogBuilder(object):
self.update_progress_full_step(_("NCX for Genres")) self.update_progress_full_step(_("NCX for Genres"))
if not len(self.genres): if not len(self.genres):
self.opts.log.warn(" No genres found in tags.\n" self.opts.log.warn(" No genres found\n"
" No Genre section added to Catalog") " No Genre section added to Catalog")
return return
@ -3830,8 +3877,10 @@ class CatalogBuilder(object):
self.play_order += 1 self.play_order += 1
navLabelTag = Tag(ncx_soup, 'navLabel') navLabelTag = Tag(ncx_soup, 'navLabel')
textTag = Tag(ncx_soup, 'text') textTag = Tag(ncx_soup, 'text')
# textTag.insert(0, NavigableString('%s (%d)' % (section_title, len(genre_list)))) section_header = '%s [%d]' % (tocTitle, len(self.genres))
textTag.insert(0, NavigableString('%s' % tocTitle)) if self.generate_for_kindle_mobi:
section_header = tocTitle
textTag.insert(0, NavigableString(section_header))
navLabelTag.insert(0, textTag) navLabelTag.insert(0, textTag)
nptc = 0 nptc = 0
navPointTag.insert(nptc, navLabelTag) navPointTag.insert(nptc, navLabelTag)
@ -3993,7 +4042,6 @@ class CatalogBuilder(object):
mtc += 1 mtc += 1
# Write the thumbnail images, descriptions to the manifest # Write the thumbnail images, descriptions to the manifest
sort_descriptions_by = []
if self.opts.generate_descriptions: if self.opts.generate_descriptions:
for thumb in self.thumbs: for thumb in self.thumbs:
itemTag = Tag(soup, "item") itemTag = Tag(soup, "item")
@ -4004,9 +4052,6 @@ class CatalogBuilder(object):
manifest.insert(mtc, itemTag) manifest.insert(mtc, itemTag)
mtc += 1 mtc += 1
# HTML files - add descriptions to manifest and spine
sort_descriptions_by = self.books_by_author if self.opts.sort_descriptions_by_author \
else self.books_by_title
# Add html_files to manifest and spine # Add html_files to manifest and spine
for file in self.html_filelist_1: for file in self.html_filelist_1:
@ -4060,7 +4105,7 @@ class CatalogBuilder(object):
spine.insert(stc, itemrefTag) spine.insert(stc, itemrefTag)
stc += 1 stc += 1
for book in sort_descriptions_by: for book in self.books_by_description:
# manifest # manifest
itemTag = Tag(soup, "item") itemTag = Tag(soup, "item")
itemTag['href'] = "content/book_%d.html" % int(book['id']) itemTag['href'] = "content/book_%d.html" % int(book['id'])
@ -4286,7 +4331,8 @@ class CatalogBuilder(object):
f.write(thumb_data) f.write(thumb_data)
# Save thumb to archive # Save thumb to archive
if zf is not None: # Ensure that the read succeeded if zf is not None:
# Ensure that the read succeeded
# If we failed to open the zip file for reading, # If we failed to open the zip file for reading,
# we dont know if it contained the thumb or not # we dont know if it contained the thumb or not
zf = _open_archive('a') zf = _open_archive('a')
@ -4363,7 +4409,6 @@ class CatalogBuilder(object):
# Clear the book's cover property # Clear the book's cover property
title['cover'] = None title['cover'] = None
# Write thumb_width to the file, validating cache contents # Write thumb_width to the file, validating cache contents
# Allows detection of aborted catalog builds # Allows detection of aborted catalog builds
with ZipFile(self.thumbs_path, mode='a') as zfw: with ZipFile(self.thumbs_path, mode='a') as zfw:
@ -4853,5 +4898,3 @@ class CatalogBuilder(object):
outfile = open("%s/%s.ncx" % (self.catalog_path, self.opts.basename), 'w') outfile = open("%s/%s.ncx" % (self.catalog_path, self.opts.basename), 'w')
outfile.write(self.ncx_soup.prettify()) outfile.write(self.ncx_soup.prettify())

View File

@ -22,6 +22,7 @@ from calibre.library.comments import comments_to_html
from calibre.library.server import custom_fields_to_display from calibre.library.server import custom_fields_to_display
from calibre.library.field_metadata import category_icon_map from calibre.library.field_metadata import category_icon_map
from calibre.library.server.utils import quote, unquote from calibre.library.server.utils import quote, unquote
from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
def xml(*args, **kwargs): def xml(*args, **kwargs):
ans = prepare_string_for_xml(*args, **kwargs) ans = prepare_string_for_xml(*args, **kwargs)
@ -823,6 +824,16 @@ class BrowseServer(object):
if field in ('title', 'formats') or not args.get(field, False) \ if field in ('title', 'formats') or not args.get(field, False) \
or not m['name']: or not m['name']:
continue continue
if field == 'identifiers':
urls = urls_from_identifiers(mi.get(field, {}))
links = [u'<a class="details_category_link" target="_new" href="%s" title="%s:%s">%s</a>' % (url, id_typ, id_val, name)
for name, id_typ, id_val, url in urls]
links = u', '.join(links)
if links:
fields.append((m['name'], u'<strong>%s: </strong>%s'%(
_('Ids'), links)))
continue
if m['datatype'] == 'rating': if m['datatype'] == 'rating':
r = u'<strong>%s: </strong>'%xml(m['name']) + \ r = u'<strong>%s: </strong>'%xml(m['name']) + \
render_rating(mi.get(field)/2.0, self.opts.url_prefix, render_rating(mi.get(field)/2.0, self.opts.url_prefix,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More