mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
0.9.8+
This commit is contained in:
commit
69f8b36eae
@ -20,6 +20,7 @@ class Aksiyon (BasicNewsRecipe):
|
||||
auto_cleanup = True
|
||||
cover_img_url = 'http://www.aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
masthead_url = 'http://aksiyon.com.tr/aksiyon/images/aksiyon/top-page/aksiyon_top_r2_c1.jpg'
|
||||
ignore_duplicate_articles = { 'title', 'url' }
|
||||
remove_empty_feeds= True
|
||||
feeds = [
|
||||
( u'KAPAK', u'http://www.aksiyon.com.tr/aksiyon/rss?sectionId=26'),
|
||||
|
@ -21,10 +21,11 @@ class Engadget(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||
#keep_only_tags = [dict(name='div', attrs={'class':['post_content permalink ','post_content permalink alt-post-full']})]
|
||||
#remove_tags = [dict(name='div', attrs={'class':['filed_under','post_footer']})]
|
||||
#remove_tags_after = [dict(name='div', attrs={'class':['post_footer']})]
|
||||
|
||||
feeds = [(u'Posts', u'http://www.engadget.com/rss.xml')]
|
||||
|
||||
|
@ -6,22 +6,41 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
nytimes.com
|
||||
'''
|
||||
import re, string, time
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from calibre import strftime
|
||||
from datetime import timedelta, date
|
||||
from time import sleep
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
recursions=1 # set this to zero to omit Related articles lists
|
||||
|
||||
# set getTechBlogs to True to include the technology blogs
|
||||
# set tech_oldest_article to control article age
|
||||
# set tech_max_articles_per_feed to control article count
|
||||
getTechBlogs = True
|
||||
remove_empty_feeds = True
|
||||
tech_oldest_article = 14
|
||||
tech_max_articles_per_feed = 25
|
||||
|
||||
|
||||
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
||||
headlinesOnly = True
|
||||
|
||||
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
|
||||
# number of days old an article can be for inclusion. If oldest_article = 0 all articles
|
||||
# will be included. Note: oldest_article is ignored if webEdition = False
|
||||
# set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
|
||||
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
|
||||
# will be included. Note: oldest_web_article is ignored if webEdition = False
|
||||
webEdition = False
|
||||
oldest_article = 7
|
||||
oldest_web_article = 7
|
||||
|
||||
# download higher resolution images than the small thumbnails typically included in the article
|
||||
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
||||
useHighResImages = True
|
||||
|
||||
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
|
||||
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
|
||||
replaceKindleVersion = False
|
||||
|
||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||
# Otherwise, only the sections named will be included. For example,
|
||||
@ -82,57 +101,68 @@ class NYTimes(BasicNewsRecipe):
|
||||
('Education',u'education'),
|
||||
('Multimedia',u'multimedia'),
|
||||
(u'Obituaries',u'obituaries'),
|
||||
(u'Sunday Magazine',u'magazine'),
|
||||
(u'Week in Review',u'weekinreview')]
|
||||
(u'Sunday Magazine',u'magazine')
|
||||
]
|
||||
|
||||
tech_feeds = [
|
||||
(u'Tech - Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
|
||||
(u'Tech - Bits', u'http://bits.blogs.nytimes.com/feed/'),
|
||||
(u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
|
||||
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
|
||||
]
|
||||
|
||||
|
||||
if headlinesOnly:
|
||||
title='New York Times Headlines'
|
||||
description = 'Headlines from the New York Times. Needs a subscription from http://www.nytimes.com'
|
||||
needs_subscription = 'optional'
|
||||
description = 'Headlines from the New York Times'
|
||||
needs_subscription = False
|
||||
elif webEdition:
|
||||
title='New York Times (Web)'
|
||||
description = 'New York Times on the Web'
|
||||
needs_subscription = True
|
||||
needs_subscription = False
|
||||
elif replaceKindleVersion:
|
||||
title='The New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
needs_subscription = False
|
||||
else:
|
||||
title='New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
needs_subscription = True
|
||||
needs_subscription = False
|
||||
|
||||
|
||||
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
|
||||
|
||||
def decode_us_date(self,datestr):
|
||||
udate = datestr.strip().lower().split()
|
||||
def decode_url_date(self,url):
|
||||
urlitems = url.split('/')
|
||||
try:
|
||||
m = self.month_list.index(udate[0])+1
|
||||
d = date(int(urlitems[3]),int(urlitems[4]),int(urlitems[5]))
|
||||
except:
|
||||
return date.today()
|
||||
d = int(udate[1])
|
||||
y = int(udate[2])
|
||||
try:
|
||||
d = date(y,m,d)
|
||||
d = date(int(urlitems[4]),int(urlitems[5]),int(urlitems[6]))
|
||||
except:
|
||||
d = date.today
|
||||
return None
|
||||
return d
|
||||
|
||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||
if oldest_web_article is None:
|
||||
earliest_date = date.today()
|
||||
else:
|
||||
earliest_date = date.today() - timedelta(days=oldest_web_article)
|
||||
oldest_article = 365 # by default, a long time ago
|
||||
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
|
||||
encoding = 'utf-8'
|
||||
|
||||
timefmt = ''
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
|
||||
simultaneous_downloads = 1
|
||||
|
||||
cover_margins = (18,18,'grey99')
|
||||
|
||||
remove_tags_before = dict(id='article')
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags = [dict(attrs={'class':[
|
||||
remove_tags = [
|
||||
dict(attrs={'class':[
|
||||
'articleFooter',
|
||||
'articleTools',
|
||||
'columnGroup doubleRule',
|
||||
'columnGroup singleRule',
|
||||
'columnGroup last',
|
||||
'columnGroup last',
|
||||
@ -140,7 +170,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
'dottedLine',
|
||||
'entry-meta',
|
||||
'entry-response module',
|
||||
'icon enlargeThis',
|
||||
'leftNavTabs',
|
||||
'metaFootnote',
|
||||
'module box nav',
|
||||
@ -150,10 +179,43 @@ class NYTimes(BasicNewsRecipe):
|
||||
'relatedSearchesModule',
|
||||
'side_tool',
|
||||
'singleAd',
|
||||
'entry entry-utility', #added for DealBook
|
||||
'entry-tags', #added for DealBook
|
||||
'footer promos clearfix', #added for DealBook
|
||||
'footer links clearfix', #added for DealBook
|
||||
'tabsContainer', #added for other blog downloads
|
||||
'column lastColumn', #added for other blog downloads
|
||||
'pageHeaderWithLabel', #added for other gadgetwise downloads
|
||||
'column two', #added for other blog downloads
|
||||
'column two last', #added for other blog downloads
|
||||
'column three', #added for other blog downloads
|
||||
'column three last', #added for other blog downloads
|
||||
'column four',#added for other blog downloads
|
||||
'column four last',#added for other blog downloads
|
||||
'column last', #added for other blog downloads
|
||||
'entry entry-related',
|
||||
'subNavigation tabContent active', #caucus blog navigation
|
||||
'mediaOverlay slideshow',
|
||||
'wideThumb',
|
||||
'video', #added 02-11-2011
|
||||
'videoHeader',#added 02-11-2011
|
||||
'articleInlineVideoHolder', #added 02-11-2011
|
||||
'assetCompanionAd',
|
||||
re.compile('^subNavigation'),
|
||||
re.compile('^leaderboard'),
|
||||
re.compile('^module'),
|
||||
re.compile('commentCount')
|
||||
]}),
|
||||
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
|
||||
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
|
||||
dict(name='div', attrs={'class':'tweet'}),
|
||||
dict(name='span', attrs={'class':'commentCount meta'}),
|
||||
dict(name='div', attrs={'id':'header'}),
|
||||
dict(name='div', attrs={'id':re.compile('commentsContainer')}), # bits, pogue, gadgetwise, open
|
||||
dict(name='ul', attrs={'class':re.compile('entry-tools')}), # pogue, gadgetwise
|
||||
dict(name='div', attrs={'class':re.compile('nocontent')}), # pogue, gadgetwise
|
||||
dict(name='div', attrs={'id':re.compile('respond')}), # open
|
||||
dict(name='div', attrs={'class':re.compile('entry-tags')}), # pogue
|
||||
dict(id=[
|
||||
'adxLeaderboard',
|
||||
'adxSponLink',
|
||||
@ -183,22 +245,29 @@ class NYTimes(BasicNewsRecipe):
|
||||
'side_index',
|
||||
'side_tool',
|
||||
'toolsRight',
|
||||
'skybox', #added for DealBook
|
||||
'TopAd', #added for DealBook
|
||||
'related-content', #added for DealBook
|
||||
]),
|
||||
dict(name=['script', 'noscript', 'style','form','hr'])]
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { text-align: left; font-size: small; }
|
||||
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { font-weight: normal; text-align: left; font-size: 50%; }
|
||||
.caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
a:link {text-decoration: none; }
|
||||
.date{font-size: 50%; }
|
||||
.update{font-size: 50%; }
|
||||
.articleBody { }
|
||||
.authorId {text-align: left; }
|
||||
.authorId {text-align: left; font-size: 50%; }
|
||||
.image {text-align: center;}
|
||||
.source {text-align: left; }'''
|
||||
.aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
|
||||
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
|
||||
.source {text-align: left; font-size: x-small; }'''
|
||||
|
||||
|
||||
articles = {}
|
||||
@ -237,7 +306,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
def exclude_url(self,url):
|
||||
if not url.startswith("http"):
|
||||
return True
|
||||
if not url.endswith(".html"):
|
||||
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
|
||||
return True
|
||||
if 'nytimes.com' not in url:
|
||||
return True
|
||||
@ -280,88 +349,91 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.form = br.forms().next()
|
||||
br['userid'] = self.username
|
||||
br['password'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'Please try again' in raw:
|
||||
raise Exception('Your username and password are incorrect')
|
||||
return br
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
return self.index_to_soup(url, raw=True)
|
||||
## This doesn't work (and probably never did). It either gets another serve of the advertisement,
|
||||
## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
|
||||
##
|
||||
## def skip_ad_pages(self, soup):
|
||||
## # Skip ad pages served before actual article
|
||||
## skip_tag = soup.find(True, {'name':'skip'})
|
||||
## if skip_tag is not None:
|
||||
## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
## url += '?pagewanted=all'
|
||||
## self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
## return self.index_to_soup(url, raw=True)
|
||||
|
||||
|
||||
cover_tag = 'NY_NYT'
|
||||
def get_cover_url(self):
|
||||
cover = None
|
||||
st = time.localtime()
|
||||
year = str(st.tm_year)
|
||||
month = "%.2d" % st.tm_mon
|
||||
day = "%.2d" % st.tm_mday
|
||||
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg'
|
||||
from datetime import timedelta, date
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
|
||||
def short_title(self):
|
||||
return self.title
|
||||
|
||||
def index_to_soup(self, url_or_raw, raw=False):
|
||||
'''
|
||||
OVERRIDE of class method
|
||||
deals with various page encodings between index and articles
|
||||
'''
|
||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||
|
||||
def article_to_soup(self, url_or_raw, raw=False):
|
||||
from contextlib import closing
|
||||
import copy
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
br = self.clone_browser(self.browser)
|
||||
f = br.open_novisit(url_or_raw)
|
||||
open_func = getattr(br, 'open_novisit', br.open)
|
||||
with closing(open_func(url_or_raw)) as f:
|
||||
_raw = f.read()
|
||||
f.close()
|
||||
if not _raw:
|
||||
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||
else:
|
||||
_raw = url_or_raw
|
||||
if raw:
|
||||
return _raw
|
||||
|
||||
if not isinstance(_raw, unicode) and self.encoding:
|
||||
_raw = _raw.decode(docEncoding, 'replace')
|
||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||
return BeautifulSoup(_raw, markupMassage=massage)
|
||||
if callable(self.encoding):
|
||||
_raw = self.encoding(_raw)
|
||||
else:
|
||||
_raw = _raw.decode(self.encoding, 'replace')
|
||||
|
||||
# Entry point
|
||||
soup = get_the_soup( self.encoding, url_or_raw )
|
||||
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||
if docEncoding == '' :
|
||||
docEncoding = self.encoding
|
||||
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
|
||||
nmassage.extend(self.preprocess_regexps)
|
||||
nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
|
||||
# Some websites have buggy doctype declarations that mess up beautifulsoup
|
||||
# Remove comments as they can leave detritus when extracting tags leaves
|
||||
# multiple nested comments
|
||||
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
|
||||
usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
|
||||
usrc = self.preprocess_raw_html(usrc, url_or_raw)
|
||||
return BeautifulSoup(usrc, markupMassage=nmassage)
|
||||
|
||||
if self.verbose > 2:
|
||||
self.log( " document encoding: '%s'" % docEncoding)
|
||||
if docEncoding != self.encoding :
|
||||
soup = get_the_soup(docEncoding, url_or_raw)
|
||||
|
||||
return soup
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
massaged = re.sub("&","&", massaged)
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
@ -383,6 +455,16 @@ class NYTimes(BasicNewsRecipe):
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
return
|
||||
if self.webEdition:
|
||||
date_tag = self.decode_url_date(url)
|
||||
if date_tag is not None:
|
||||
if self.oldest_web_article is not None:
|
||||
if date_tag < self.earliest_date:
|
||||
self.log("Skipping article %s" % url)
|
||||
return
|
||||
else:
|
||||
self.log("Skipping article %s" % url)
|
||||
return
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
@ -407,6 +489,31 @@ class NYTimes(BasicNewsRecipe):
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
|
||||
def get_tech_feeds(self,ans):
|
||||
if self.getTechBlogs:
|
||||
tech_articles = {}
|
||||
key_list = []
|
||||
save_oldest_article = self.oldest_article
|
||||
save_max_articles_per_feed = self.max_articles_per_feed
|
||||
self.oldest_article = self.tech_oldest_article
|
||||
self.max_articles_per_feed = self.tech_max_articles_per_feed
|
||||
self.feeds = self.tech_feeds
|
||||
tech = self.parse_feeds()
|
||||
self.oldest_article = save_oldest_article
|
||||
self.max_articles_per_feed = save_max_articles_per_feed
|
||||
self.feeds = None
|
||||
for f in tech:
|
||||
key_list.append(f.title)
|
||||
tech_articles[f.title] = []
|
||||
for a in f.articles:
|
||||
tech_articles[f.title].append(
|
||||
dict(title=a.title, url=a.url, date=a.date,
|
||||
description=a.summary, author=a.author,
|
||||
content=a.content))
|
||||
tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
|
||||
for x in tech_ans:
|
||||
ans.append(x)
|
||||
return ans
|
||||
|
||||
def parse_web_edition(self):
|
||||
|
||||
@ -418,31 +525,41 @@ class NYTimes(BasicNewsRecipe):
|
||||
if sec_title in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",sec_title
|
||||
continue
|
||||
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
|
||||
try:
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
|
||||
except:
|
||||
continue
|
||||
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
|
||||
|
||||
self.key = sec_title
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
if div['class'] in ['story', 'story headline'] :
|
||||
attrs={'class':['section-headline', 'ledeStory', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
if div['class'] in ['story', 'story headline', 'storyHeader'] :
|
||||
self.handle_article(div)
|
||||
elif div['class'] == 'ledeStory':
|
||||
divsub = div.find('div','storyHeader')
|
||||
if divsub is not None:
|
||||
self.handle_article(divsub)
|
||||
ulrefer = div.find('ul','refer')
|
||||
if ulrefer is not None:
|
||||
for lidiv in ulrefer.findAll('li'):
|
||||
self.handle_article(lidiv)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
self.handle_article(lidiv)
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
return self.filter_ans(self.get_tech_feeds(self.ans))
|
||||
|
||||
|
||||
def parse_todays_index(self):
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
|
||||
skipping = False
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
self.key = string.capwords(self.feed_title(div))
|
||||
self.key = self.key.replace('Op-ed','Op-Ed')
|
||||
@ -466,7 +583,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.handle_article(lidiv)
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
return self.filter_ans(self.get_tech_feeds(self.ans))
|
||||
|
||||
def parse_headline_index(self):
|
||||
|
||||
@ -514,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
for h3_item in search_div.findAll('h3'):
|
||||
byline = h3_item.h6
|
||||
if byline is not None:
|
||||
author = self.tag_to_string(byline,usa_alt=False)
|
||||
author = self.tag_to_string(byline,use_alt=False)
|
||||
else:
|
||||
author = ''
|
||||
a = h3_item.find('a', href=True)
|
||||
@ -540,7 +657,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
return self.filter_ans(self.get_tech_feeds(self.ans))
|
||||
|
||||
def parse_index(self):
|
||||
if self.headlinesOnly:
|
||||
@ -550,32 +667,190 @@ class NYTimes(BasicNewsRecipe):
|
||||
else:
|
||||
return self.parse_todays_index()
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
def strip_anchors(self,soup,kill_all=False):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
if kill_all or (self.recursions==0):
|
||||
a.replaceWith(self.tag_to_string(a,False))
|
||||
else:
|
||||
if a.has_key('href'):
|
||||
if a['href'].startswith('http://www.nytimes'):
|
||||
if not a['href'].endswith('pagewanted=all'):
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if self.exclude_url(url):
|
||||
a.replaceWith(self.tag_to_string(a,False))
|
||||
else:
|
||||
a['href'] = url+'?pagewanted=all'
|
||||
elif not (a['href'].startswith('http://pogue') or \
|
||||
a['href'].startswith('http://bits') or \
|
||||
a['href'].startswith('http://travel') or \
|
||||
a['href'].startswith('http://business') or \
|
||||
a['href'].startswith('http://tech') or \
|
||||
a['href'].startswith('http://health') or \
|
||||
a['href'].startswith('http://dealbook') or \
|
||||
a['href'].startswith('http://open')):
|
||||
a.replaceWith(self.tag_to_string(a,False))
|
||||
return soup
|
||||
|
||||
def handle_tags(self,soup):
|
||||
try:
|
||||
print("HANDLE TAGS: TITLE = "+self.tag_to_string(soup.title))
|
||||
except:
|
||||
print("HANDLE TAGS: NO TITLE")
|
||||
if soup is None:
|
||||
print("ERROR: handle_tags received NoneType")
|
||||
return None
|
||||
|
||||
## print("HANDLING AD FORWARD:")
|
||||
## print(soup)
|
||||
if self.keep_only_tags:
|
||||
body = Tag(soup, 'body')
|
||||
try:
|
||||
if isinstance(self.keep_only_tags, dict):
|
||||
self.keep_only_tags = [self.keep_only_tags]
|
||||
for spec in self.keep_only_tags:
|
||||
for tag in soup.find('body').findAll(**spec):
|
||||
body.insert(len(body.contents), tag)
|
||||
soup.find('body').replaceWith(body)
|
||||
except AttributeError: # soup has no body element
|
||||
pass
|
||||
|
||||
def remove_beyond(tag, next):
|
||||
while tag is not None and getattr(tag, 'name', None) != 'body':
|
||||
after = getattr(tag, next)
|
||||
while after is not None:
|
||||
ns = getattr(tag, next)
|
||||
after.extract()
|
||||
after = ns
|
||||
tag = tag.parent
|
||||
|
||||
if self.remove_tags_after is not None:
|
||||
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
|
||||
for spec in rt:
|
||||
tag = soup.find(**spec)
|
||||
remove_beyond(tag, 'nextSibling')
|
||||
|
||||
if self.remove_tags_before is not None:
|
||||
tag = soup.find(**self.remove_tags_before)
|
||||
remove_beyond(tag, 'previousSibling')
|
||||
|
||||
for kwds in self.remove_tags:
|
||||
for tag in soup.findAll(**kwds):
|
||||
tag.extract()
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
sleep(5)
|
||||
soup = self.handle_tags(self.article_to_soup(url))
|
||||
|
||||
if self.webEdition & (self.oldest_article>0):
|
||||
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
|
||||
if date_tag:
|
||||
date_str = self.tag_to_string(date_tag,use_alt=False)
|
||||
date_str = date_str.replace('Published:','')
|
||||
date_items = date_str.split(',')
|
||||
try:
|
||||
datestring = date_items[0]+' '+date_items[1]
|
||||
article_date = self.decode_us_date(datestring)
|
||||
except:
|
||||
article_date = date.today()
|
||||
if article_date < self.earliest_date:
|
||||
self.log("Skipping article dated %s" % date_str)
|
||||
return None
|
||||
# check if the article is from one of the tech blogs
|
||||
blog=soup.find('div',attrs={'id':['pogue','bits','gadgetwise','open']})
|
||||
|
||||
if blog is not None:
|
||||
old_body = soup.find('body')
|
||||
new_body=Tag(soup,'body')
|
||||
new_body.append(soup.find('div',attrs={'id':'content'}))
|
||||
new_body.find('div',attrs={'id':'content'})['id']='blogcontent' # identify for postprocess_html
|
||||
old_body.replaceWith(new_body)
|
||||
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
||||
if divr.find(text=re.compile('Sign up')):
|
||||
divr.extract()
|
||||
divr = soup.find('div',attrs={'id':re.compile('related-content')})
|
||||
if divr is not None:
|
||||
# handle related articles
|
||||
rlist = []
|
||||
ul = divr.find('ul')
|
||||
if ul is not None:
|
||||
for li in ul.findAll('li'):
|
||||
atag = li.find('a')
|
||||
if atag is not None:
|
||||
if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
|
||||
atag['href'].startswith('http://open'):
|
||||
atag.find(text=True).replaceWith(self.massageNCXText(self.tag_to_string(atag,False)))
|
||||
rlist.append(atag)
|
||||
divr.extract()
|
||||
if rlist != []:
|
||||
asidediv = Tag(soup,'div',[('class','aside')])
|
||||
if soup.find('hr') is None:
|
||||
asidediv.append(Tag(soup,'hr'))
|
||||
h4 = Tag(soup,'h4',[('class','asidenote')])
|
||||
h4.insert(0,"Related Posts")
|
||||
asidediv.append(h4)
|
||||
ul = Tag(soup,'ul')
|
||||
for r in rlist:
|
||||
li = Tag(soup,'li',[('class','aside')])
|
||||
r['class'] = 'aside'
|
||||
li.append(r)
|
||||
ul.append(li)
|
||||
asidediv.append(ul)
|
||||
asidediv.append(Tag(soup,'hr'))
|
||||
smain = soup.find('body')
|
||||
smain.append(asidediv)
|
||||
for atag in soup.findAll('a'):
|
||||
img = atag.find('img')
|
||||
if img is not None:
|
||||
atag.replaceWith(img)
|
||||
elif not atag.has_key('href'):
|
||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
|
||||
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
|
||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||
hdr = soup.find('address')
|
||||
if hdr is not None:
|
||||
hdr.name='span'
|
||||
for span_credit in soup.findAll('span','credit'):
|
||||
sp = Tag(soup,'span')
|
||||
span_credit.replaceWith(sp)
|
||||
sp.append(Tag(soup,'br'))
|
||||
sp.append(span_credit)
|
||||
sp.append(Tag(soup,'br'))
|
||||
|
||||
else: # nytimes article
|
||||
|
||||
related = [] # these will be the related articles
|
||||
first_outer = None # first related outer tag
|
||||
first_related = None # first related tag
|
||||
for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
|
||||
for rdiv in soup.findAll('div','columnGroup doubleRule'):
|
||||
if rdiv.find('h3') is not None:
|
||||
if self.tag_to_string(rdiv.h3,False).startswith('Related'):
|
||||
rdiv.h3.find(text=True).replaceWith("Related articles")
|
||||
rdiv.h3['class'] = 'asidenote'
|
||||
for litag in rdiv.findAll('li'):
|
||||
if litag.find('a') is not None:
|
||||
if litag.find('a')['href'].startswith('http://www.nytimes.com'):
|
||||
url = re.sub(r'\?.*', '', litag.find('a')['href'])
|
||||
litag.find('a')['href'] = url+'?pagewanted=all'
|
||||
litag.extract()
|
||||
related.append(litag)
|
||||
if first_related is None:
|
||||
first_related = rdiv
|
||||
first_outer = outerdiv
|
||||
else:
|
||||
litag.extract()
|
||||
if related != []:
|
||||
for r in related:
|
||||
if r.h6: # don't want the anchor inside a h6 tag
|
||||
r.h6.replaceWith(r.h6.a)
|
||||
first_related.ul.append(r)
|
||||
first_related.insert(0,Tag(soup,'hr'))
|
||||
first_related.append(Tag(soup,'hr'))
|
||||
first_related['class'] = 'aside'
|
||||
first_outer.replaceWith(first_related) # replace the outer tag with the related tag
|
||||
|
||||
for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
|
||||
rdiv.extract()
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag: # remove Op_Ed author head shots
|
||||
@ -584,9 +859,77 @@ class NYTimes(BasicNewsRecipe):
|
||||
img_div = soup.find('div','inlineImage module')
|
||||
if img_div:
|
||||
img_div.extract()
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
def postprocess_html(self,soup, True):
|
||||
if self.useHighResImages:
|
||||
try:
|
||||
#open up all the "Enlarge this Image" pop-ups and download the full resolution jpegs
|
||||
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
|
||||
if enlargeThisList:
|
||||
for popupref in enlargeThisList:
|
||||
popupreflink = popupref.find('a')
|
||||
if popupreflink:
|
||||
reflinkstring = str(popupreflink['href'])
|
||||
refstart = reflinkstring.find("javascript:pop_me_up2('") + len("javascript:pop_me_up2('")
|
||||
refend = reflinkstring.find(".html", refstart) + len(".html")
|
||||
reflinkstring = reflinkstring[refstart:refend]
|
||||
|
||||
popuppage = self.browser.open(reflinkstring)
|
||||
popuphtml = popuppage.read()
|
||||
popuppage.close()
|
||||
if popuphtml:
|
||||
st = time.localtime()
|
||||
year = str(st.tm_year)
|
||||
month = "%.2d" % st.tm_mon
|
||||
day = "%.2d" % st.tm_mday
|
||||
imgstartpos = popuphtml.find('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/') + len('http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/')
|
||||
highResImageLink = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/' + popuphtml[imgstartpos:popuphtml.find('.jpg',imgstartpos)+4]
|
||||
popupSoup = BeautifulSoup(popuphtml)
|
||||
highResTag = popupSoup.find('img', {'src':highResImageLink})
|
||||
if highResTag:
|
||||
try:
|
||||
newWidth = highResTag['width']
|
||||
newHeight = highResTag['height']
|
||||
imageTag = popupref.parent.find("img")
|
||||
except:
|
||||
self.log("Error: finding width and height of img")
|
||||
popupref.extract()
|
||||
if imageTag:
|
||||
try:
|
||||
imageTag['src'] = highResImageLink
|
||||
imageTag['width'] = newWidth
|
||||
imageTag['height'] = newHeight
|
||||
except:
|
||||
self.log("Error setting the src width and height parameters")
|
||||
except Exception:
|
||||
self.log("Error pulling high resolution images")
|
||||
|
||||
try:
|
||||
#in case pulling images failed, delete the enlarge this text
|
||||
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
|
||||
if enlargeThisList:
|
||||
for popupref in enlargeThisList:
|
||||
popupref.extract()
|
||||
except:
|
||||
self.log("Error removing Enlarge this text")
|
||||
|
||||
|
||||
return self.strip_anchors(soup,False)
|
||||
|
||||
def postprocess_html(self,soup,first_fetch):
|
||||
if not first_fetch: # remove Related links
|
||||
for aside in soup.findAll('div','aside'):
|
||||
aside.extract()
|
||||
soup = self.strip_anchors(soup,True)
|
||||
|
||||
if soup.find('div',attrs={'id':'blogcontent'}) is None:
|
||||
if first_fetch:
|
||||
aside = soup.find('div','aside')
|
||||
if aside is not None: # move the related list to the end of the article
|
||||
art = soup.find('div',attrs={'id':'article'})
|
||||
if art is None:
|
||||
art = soup.find('div',attrs={'class':'article'})
|
||||
if art is not None:
|
||||
art.append(aside)
|
||||
try:
|
||||
if self.one_picture_per_article:
|
||||
# Remove all images after first
|
||||
@ -642,6 +985,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
try:
|
||||
# Change <nyt_headline> to <h2>
|
||||
h1 = soup.find('h1')
|
||||
blogheadline = str(h1) #added for dealbook
|
||||
if h1:
|
||||
headline = h1.find("nyt_headline")
|
||||
if headline:
|
||||
@ -649,13 +993,19 @@ class NYTimes(BasicNewsRecipe):
|
||||
tag['class'] = "headline"
|
||||
tag.insert(0, self.fixChars(headline.contents[0]))
|
||||
h1.replaceWith(tag)
|
||||
elif blogheadline.find('entry-title'):#added for dealbook
|
||||
tag = Tag(soup, "h2")#added for dealbook
|
||||
tag['class'] = "headline"#added for dealbook
|
||||
tag.insert(0, self.fixChars(h1.contents[0]))#added for dealbook
|
||||
h1.replaceWith(tag)#added for dealbook
|
||||
|
||||
else:
|
||||
# Blog entry - replace headline, remove <hr> tags
|
||||
# Blog entry - replace headline, remove <hr> tags - BCC I think this is no longer functional 1-18-2011
|
||||
headline = soup.find('title')
|
||||
if headline:
|
||||
tag = Tag(soup, "h2")
|
||||
tag['class'] = "headline"
|
||||
tag.insert(0, self.fixChars(headline.contents[0]))
|
||||
tag.insert(0, self.fixChars(headline.renderContents()))
|
||||
soup.insert(0, tag)
|
||||
hrs = soup.findAll('hr')
|
||||
for hr in hrs:
|
||||
@ -663,6 +1013,29 @@ class NYTimes(BasicNewsRecipe):
|
||||
except:
|
||||
self.log("ERROR: Problem in Change <nyt_headline> to <h2>")
|
||||
|
||||
try:
|
||||
#if this is from a blog (dealbook, fix the byline format
|
||||
bylineauthor = soup.find('address',attrs={'class':'byline author vcard'})
|
||||
if bylineauthor:
|
||||
tag = Tag(soup, "h6")
|
||||
tag['class'] = "byline"
|
||||
tag.insert(0, self.fixChars(bylineauthor.renderContents()))
|
||||
bylineauthor.replaceWith(tag)
|
||||
except:
|
||||
self.log("ERROR: fixing byline author format")
|
||||
|
||||
try:
|
||||
#if this is a blog (dealbook) fix the credit style for the pictures
|
||||
blogcredit = soup.find('div',attrs={'class':'credit'})
|
||||
if blogcredit:
|
||||
tag = Tag(soup, "h6")
|
||||
tag['class'] = "credit"
|
||||
tag.insert(0, self.fixChars(blogcredit.renderContents()))
|
||||
blogcredit.replaceWith(tag)
|
||||
except:
|
||||
self.log("ERROR: fixing credit format")
|
||||
|
||||
|
||||
try:
|
||||
# Change <h1> to <h3> - used in editorial blogs
|
||||
masthead = soup.find("h1")
|
||||
@ -685,6 +1058,13 @@ class NYTimes(BasicNewsRecipe):
|
||||
subhead.replaceWith(bTag)
|
||||
except:
|
||||
self.log("ERROR: Problem in Change <h1> to <h3> - used in editorial blogs")
|
||||
try:
|
||||
#remove the <strong> update tag
|
||||
blogupdated = soup.find('span', {'class':'update'})
|
||||
if blogupdated:
|
||||
blogupdated.replaceWith("")
|
||||
except:
|
||||
self.log("ERROR: Removing strong tag")
|
||||
|
||||
try:
|
||||
divTag = soup.find('div',attrs={'id':'articleBody'})
|
||||
@ -708,16 +1088,16 @@ class NYTimes(BasicNewsRecipe):
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
if not first:
|
||||
return
|
||||
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
|
||||
if idxdiv is not None:
|
||||
if idxdiv.img:
|
||||
self.add_toc_thumbnail(article, idxdiv.img['src'])
|
||||
self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',idxdiv.img['src']))
|
||||
else:
|
||||
img = soup.find('img')
|
||||
img = soup.find('body').find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',img['src']))
|
||||
shortparagraph = ""
|
||||
try:
|
||||
if len(article.text_summary.strip()) == 0:
|
||||
@ -731,13 +1111,22 @@ class NYTimes(BasicNewsRecipe):
|
||||
#account for blank paragraphs and short paragraphs by appending them to longer ones
|
||||
if len(refparagraph) > 0:
|
||||
if len(refparagraph) > 70: #approximately one line of text
|
||||
article.summary = article.text_summary = shortparagraph + refparagraph
|
||||
newpara = shortparagraph + refparagraph
|
||||
newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
|
||||
if newparaEm == '':
|
||||
newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
|
||||
if newparaEm == '':
|
||||
newparaDesc = newparaDateline
|
||||
article.summary = article.text_summary = newparaDesc.strip()
|
||||
return
|
||||
else:
|
||||
shortparagraph = refparagraph + " "
|
||||
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
|
||||
shortparagraph = shortparagraph + "- "
|
||||
else:
|
||||
article.summary = article.text_summary = self.massageNCXText(article.text_summary)
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
|
||||
|
@ -6,31 +6,42 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
nytimes.com
|
||||
'''
|
||||
import re, string, time
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from calibre import strftime
|
||||
from datetime import timedelta, date
|
||||
from time import sleep
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
recursions=1 # set this to zero to omit Related articles lists
|
||||
|
||||
# set getTechBlogs to True to include the technology blogs
|
||||
# set tech_oldest_article to control article age
|
||||
# set tech_max_articles_per_feed to control article count
|
||||
getTechBlogs = True
|
||||
remove_empty_feeds = True
|
||||
tech_oldest_article = 14
|
||||
tech_max_articles_per_feed = 25
|
||||
|
||||
|
||||
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
||||
headlinesOnly = False
|
||||
|
||||
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
|
||||
# number of days old an article can be for inclusion. If oldest_article = 0 all articles
|
||||
# will be included. Note: oldest_article is ignored if webEdition = False
|
||||
# set webEdition to True for the Web edition of the newspaper. Set oldest_web_article to the
|
||||
# number of days old an article can be for inclusion. If oldest_web_article = None all articles
|
||||
# will be included. Note: oldest_web_article is ignored if webEdition = False
|
||||
webEdition = False
|
||||
oldest_article = 7
|
||||
|
||||
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
|
||||
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
|
||||
replaceKindleVersion = False
|
||||
oldest_web_article = 7
|
||||
|
||||
# download higher resolution images than the small thumbnails typically included in the article
|
||||
# the down side of having large beautiful images is the file size is much larger, on the order of 7MB per paper
|
||||
useHighResImages = True
|
||||
|
||||
# replace paid Kindle Version: the name will be changed to "The New York Times" to cause
|
||||
# previous paid versions of the new york times to best sent to the back issues folder on the kindle
|
||||
replaceKindleVersion = False
|
||||
|
||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||
# Otherwise, only the sections named will be included. For example,
|
||||
#
|
||||
@ -90,60 +101,68 @@ class NYTimes(BasicNewsRecipe):
|
||||
('Education',u'education'),
|
||||
('Multimedia',u'multimedia'),
|
||||
(u'Obituaries',u'obituaries'),
|
||||
(u'Sunday Magazine',u'magazine'),
|
||||
(u'Week in Review',u'weekinreview')]
|
||||
(u'Sunday Magazine',u'magazine')
|
||||
]
|
||||
|
||||
tech_feeds = [
|
||||
(u'Tech - Pogues Posts', u'http://pogue.blogs.nytimes.com/feed/'),
|
||||
(u'Tech - Bits', u'http://bits.blogs.nytimes.com/feed/'),
|
||||
(u'Tech - Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
|
||||
(u'Tech - Open', u'http://open.blogs.nytimes.com/feed/')
|
||||
]
|
||||
|
||||
|
||||
if headlinesOnly:
|
||||
title='New York Times Headlines'
|
||||
description = 'Headlines from the New York Times'
|
||||
needs_subscription = True
|
||||
needs_subscription = False
|
||||
elif webEdition:
|
||||
title='New York Times (Web)'
|
||||
description = 'New York Times on the Web'
|
||||
needs_subscription = True
|
||||
needs_subscription = False
|
||||
elif replaceKindleVersion:
|
||||
title='The New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
needs_subscription = True
|
||||
needs_subscription = False
|
||||
else:
|
||||
title='New York Times'
|
||||
description = 'Today\'s New York Times. Needs subscription from http://www.nytimes.com'
|
||||
needs_subscription = True
|
||||
description = 'Today\'s New York Times'
|
||||
needs_subscription = False
|
||||
|
||||
|
||||
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
|
||||
|
||||
def decode_us_date(self,datestr):
|
||||
udate = datestr.strip().lower().split()
|
||||
def decode_url_date(self,url):
|
||||
urlitems = url.split('/')
|
||||
try:
|
||||
m = self.month_list.index(udate[0])+1
|
||||
d = date(int(urlitems[3]),int(urlitems[4]),int(urlitems[5]))
|
||||
except:
|
||||
return date.today()
|
||||
d = int(udate[1])
|
||||
y = int(udate[2])
|
||||
try:
|
||||
d = date(y,m,d)
|
||||
d = date(int(urlitems[4]),int(urlitems[5]),int(urlitems[6]))
|
||||
except:
|
||||
d = date.today
|
||||
return None
|
||||
return d
|
||||
|
||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||
if oldest_web_article is None:
|
||||
earliest_date = date.today()
|
||||
else:
|
||||
earliest_date = date.today() - timedelta(days=oldest_web_article)
|
||||
oldest_article = 365 # by default, a long time ago
|
||||
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding/Ben Collier'
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
requires_version = (0, 7, 5)
|
||||
|
||||
encoding = 'utf-8'
|
||||
|
||||
timefmt = ''
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
|
||||
simultaneous_downloads = 1
|
||||
|
||||
cover_margins = (18,18,'grey99')
|
||||
|
||||
remove_tags_before = dict(id='article')
|
||||
remove_tags_after = dict(id='article')
|
||||
remove_tags = [dict(attrs={'class':[
|
||||
remove_tags = [
|
||||
dict(attrs={'class':[
|
||||
'articleFooter',
|
||||
'articleTools',
|
||||
'columnGroup doubleRule',
|
||||
'columnGroup singleRule',
|
||||
'columnGroup last',
|
||||
'columnGroup last',
|
||||
@ -151,7 +170,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
'dottedLine',
|
||||
'entry-meta',
|
||||
'entry-response module',
|
||||
#'icon enlargeThis', #removed to provide option for high res images
|
||||
'leftNavTabs',
|
||||
'metaFootnote',
|
||||
'module box nav',
|
||||
@ -175,12 +193,9 @@ class NYTimes(BasicNewsRecipe):
|
||||
'column four',#added for other blog downloads
|
||||
'column four last',#added for other blog downloads
|
||||
'column last', #added for other blog downloads
|
||||
'timestamp published', #added for other blog downloads
|
||||
'entry entry-related',
|
||||
'subNavigation tabContent active', #caucus blog navigation
|
||||
'columnGroup doubleRule',
|
||||
'mediaOverlay slideshow',
|
||||
'headlinesOnly multiline flush',
|
||||
'wideThumb',
|
||||
'video', #added 02-11-2011
|
||||
'videoHeader',#added 02-11-2011
|
||||
@ -189,7 +204,18 @@ class NYTimes(BasicNewsRecipe):
|
||||
re.compile('^subNavigation'),
|
||||
re.compile('^leaderboard'),
|
||||
re.compile('^module'),
|
||||
re.compile('commentCount')
|
||||
]}),
|
||||
dict(name='div', attrs={'class':re.compile('toolsList')}), # bits
|
||||
dict(name='div', attrs={'class':re.compile('postNavigation')}), # bits
|
||||
dict(name='div', attrs={'class':'tweet'}),
|
||||
dict(name='span', attrs={'class':'commentCount meta'}),
|
||||
dict(name='div', attrs={'id':'header'}),
|
||||
dict(name='div', attrs={'id':re.compile('commentsContainer')}), # bits, pogue, gadgetwise, open
|
||||
dict(name='ul', attrs={'class':re.compile('entry-tools')}), # pogue, gadgetwise
|
||||
dict(name='div', attrs={'class':re.compile('nocontent')}), # pogue, gadgetwise
|
||||
dict(name='div', attrs={'id':re.compile('respond')}), # open
|
||||
dict(name='div', attrs={'class':re.compile('entry-tags')}), # pogue
|
||||
dict(id=[
|
||||
'adxLeaderboard',
|
||||
'adxSponLink',
|
||||
@ -227,17 +253,21 @@ class NYTimes(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
.articleHeadline { text-align: left; margin-top:0.5em; margin-bottom:0.25em; }
|
||||
.credit { text-align: right; font-size: small; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { text-align: left; font-size: small; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { text-align: left; font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: small; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { text-align: left; font-size: small; }
|
||||
.caption { font-size: small; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.credit { font-weight: normal; text-align: right; font-size: 50%; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.byline { text-align: left; font-size: 50%; line-height:1em; margin-top:10px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.dateline { text-align: left; font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.kicker { font-size: 50%; line-height:1em;margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
.timestamp { font-weight: normal; text-align: left; font-size: 50%; }
|
||||
.caption { font-size: 50%; font-style:italic; line-height:1em; margin-top:5px; margin-left:0; margin-right:0; margin-bottom: 0; }
|
||||
a:link {text-decoration: none; }
|
||||
.date{font-size: 50%; }
|
||||
.update{font-size: 50%; }
|
||||
.articleBody { }
|
||||
.authorId {text-align: left; }
|
||||
.authorId {text-align: left; font-size: 50%; }
|
||||
.image {text-align: center;}
|
||||
.source {text-align: left; }'''
|
||||
.aside {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;}
|
||||
.asidenote {color:blue;margin:0px 0px 0px 0px; padding: 0px 0px 0px 0px; font-size:100%;font-weight:bold;}
|
||||
.source {text-align: left; font-size: x-small; }'''
|
||||
|
||||
|
||||
articles = {}
|
||||
@ -276,7 +306,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
def exclude_url(self,url):
|
||||
if not url.startswith("http"):
|
||||
return True
|
||||
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url and 'blogs.nytimes.com' not in url: #added for DealBook
|
||||
if not url.endswith(".html") and 'dealbook.nytimes.com' not in url: #added for DealBook
|
||||
return True
|
||||
if 'nytimes.com' not in url:
|
||||
return True
|
||||
@ -319,88 +349,91 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.nytimes.com/auth/login')
|
||||
br.form = br.forms().next()
|
||||
br['userid'] = self.username
|
||||
br['password'] = self.password
|
||||
raw = br.submit().read()
|
||||
if 'Please try again' in raw:
|
||||
raise Exception('Your username and password are incorrect')
|
||||
return br
|
||||
|
||||
def skip_ad_pages(self, soup):
|
||||
# Skip ad pages served before actual article
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
return self.index_to_soup(url, raw=True)
|
||||
## This doesn't work (and probably never did). It either gets another serve of the advertisement,
|
||||
## or if it gets the article then get_soup (from which it is invoked) traps trying to do xml decoding.
|
||||
##
|
||||
## def skip_ad_pages(self, soup):
|
||||
## # Skip ad pages served before actual article
|
||||
## skip_tag = soup.find(True, {'name':'skip'})
|
||||
## if skip_tag is not None:
|
||||
## self.log.warn("Found forwarding link: %s" % skip_tag.parent['href'])
|
||||
## url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
## url += '?pagewanted=all'
|
||||
## self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
## return self.index_to_soup(url, raw=True)
|
||||
|
||||
|
||||
cover_tag = 'NY_NYT'
|
||||
def get_cover_url(self):
|
||||
cover = None
|
||||
st = time.localtime()
|
||||
year = str(st.tm_year)
|
||||
month = "%.2d" % st.tm_mon
|
||||
day = "%.2d" % st.tm_mday
|
||||
cover = 'http://graphics8.nytimes.com/images/' + year + '/' + month +'/' + day +'/nytfrontpage/scan.jpg'
|
||||
from datetime import timedelta, date
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str(date.today().day)+'/lg/'+self.cover_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
daysback=1
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
while daysback<7:
|
||||
cover = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg'+str((date.today() - timedelta(days=daysback)).day)+'/lg/'+self.cover_tag+'.jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
daysback = daysback+1
|
||||
continue
|
||||
break
|
||||
if daysback==7:
|
||||
self.log("\nCover unavailable")
|
||||
cover = None
|
||||
return cover
|
||||
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
|
||||
def short_title(self):
|
||||
return self.title
|
||||
|
||||
def index_to_soup(self, url_or_raw, raw=False):
|
||||
'''
|
||||
OVERRIDE of class method
|
||||
deals with various page encodings between index and articles
|
||||
'''
|
||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||
|
||||
def article_to_soup(self, url_or_raw, raw=False):
|
||||
from contextlib import closing
|
||||
import copy
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
br = self.clone_browser(self.browser)
|
||||
f = br.open_novisit(url_or_raw)
|
||||
open_func = getattr(br, 'open_novisit', br.open)
|
||||
with closing(open_func(url_or_raw)) as f:
|
||||
_raw = f.read()
|
||||
f.close()
|
||||
if not _raw:
|
||||
raise RuntimeError('Could not fetch index from %s'%url_or_raw)
|
||||
else:
|
||||
_raw = url_or_raw
|
||||
if raw:
|
||||
return _raw
|
||||
|
||||
if not isinstance(_raw, unicode) and self.encoding:
|
||||
_raw = _raw.decode(docEncoding, 'replace')
|
||||
massage = list(BeautifulSoup.MARKUP_MASSAGE)
|
||||
massage.append((re.compile(r'&(\S+?);'), lambda match: entity_to_unicode(match, encoding=self.encoding)))
|
||||
return BeautifulSoup(_raw, markupMassage=massage)
|
||||
if callable(self.encoding):
|
||||
_raw = self.encoding(_raw)
|
||||
else:
|
||||
_raw = _raw.decode(self.encoding, 'replace')
|
||||
|
||||
# Entry point
|
||||
soup = get_the_soup( self.encoding, url_or_raw )
|
||||
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||
if docEncoding == '' :
|
||||
docEncoding = self.encoding
|
||||
nmassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
|
||||
nmassage.extend(self.preprocess_regexps)
|
||||
nmassage += [(re.compile(r'<!DOCTYPE .+?>', re.DOTALL), lambda m: '')]
|
||||
# Some websites have buggy doctype declarations that mess up beautifulsoup
|
||||
# Remove comments as they can leave detritus when extracting tags leaves
|
||||
# multiple nested comments
|
||||
nmassage.append((re.compile(r'<!--.*?-->', re.DOTALL), lambda m: ''))
|
||||
usrc = xml_to_unicode(_raw, self.verbose, strip_encoding_pats=True)[0]
|
||||
usrc = self.preprocess_raw_html(usrc, url_or_raw)
|
||||
return BeautifulSoup(usrc, markupMassage=nmassage)
|
||||
|
||||
if self.verbose > 2:
|
||||
self.log( " document encoding: '%s'" % docEncoding)
|
||||
if docEncoding != self.encoding :
|
||||
soup = get_the_soup(docEncoding, url_or_raw)
|
||||
|
||||
return soup
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
massaged = re.sub("&","&", massaged)
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
@ -422,6 +455,16 @@ class NYTimes(BasicNewsRecipe):
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
return
|
||||
if self.webEdition:
|
||||
date_tag = self.decode_url_date(url)
|
||||
if date_tag is not None:
|
||||
if self.oldest_web_article is not None:
|
||||
if date_tag < self.earliest_date:
|
||||
self.log("Skipping article %s" % url)
|
||||
return
|
||||
else:
|
||||
self.log("Skipping article %s" % url)
|
||||
return
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
@ -446,6 +489,31 @@ class NYTimes(BasicNewsRecipe):
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
|
||||
def get_tech_feeds(self,ans):
|
||||
if self.getTechBlogs:
|
||||
tech_articles = {}
|
||||
key_list = []
|
||||
save_oldest_article = self.oldest_article
|
||||
save_max_articles_per_feed = self.max_articles_per_feed
|
||||
self.oldest_article = self.tech_oldest_article
|
||||
self.max_articles_per_feed = self.tech_max_articles_per_feed
|
||||
self.feeds = self.tech_feeds
|
||||
tech = self.parse_feeds()
|
||||
self.oldest_article = save_oldest_article
|
||||
self.max_articles_per_feed = save_max_articles_per_feed
|
||||
self.feeds = None
|
||||
for f in tech:
|
||||
key_list.append(f.title)
|
||||
tech_articles[f.title] = []
|
||||
for a in f.articles:
|
||||
tech_articles[f.title].append(
|
||||
dict(title=a.title, url=a.url, date=a.date,
|
||||
description=a.summary, author=a.author,
|
||||
content=a.content))
|
||||
tech_ans = [(k, tech_articles[k]) for k in key_list if tech_articles.has_key(k)]
|
||||
for x in tech_ans:
|
||||
ans.append(x)
|
||||
return ans
|
||||
|
||||
def parse_web_edition(self):
|
||||
|
||||
@ -457,31 +525,41 @@ class NYTimes(BasicNewsRecipe):
|
||||
if sec_title in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",sec_title
|
||||
continue
|
||||
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
|
||||
try:
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
|
||||
except:
|
||||
continue
|
||||
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
|
||||
|
||||
self.key = sec_title
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
if div['class'] in ['story', 'story headline'] :
|
||||
attrs={'class':['section-headline', 'ledeStory', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
if div['class'] in ['story', 'story headline', 'storyHeader'] :
|
||||
self.handle_article(div)
|
||||
elif div['class'] == 'ledeStory':
|
||||
divsub = div.find('div','storyHeader')
|
||||
if divsub is not None:
|
||||
self.handle_article(divsub)
|
||||
ulrefer = div.find('ul','refer')
|
||||
if ulrefer is not None:
|
||||
for lidiv in ulrefer.findAll('li'):
|
||||
self.handle_article(lidiv)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
self.handle_article(lidiv)
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
return self.filter_ans(self.get_tech_feeds(self.ans))
|
||||
|
||||
|
||||
def parse_todays_index(self):
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
|
||||
skipping = False
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
self.key = string.capwords(self.feed_title(div))
|
||||
self.key = self.key.replace('Op-ed','Op-Ed')
|
||||
@ -505,7 +583,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.handle_article(lidiv)
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
return self.filter_ans(self.get_tech_feeds(self.ans))
|
||||
|
||||
def parse_headline_index(self):
|
||||
|
||||
@ -553,7 +631,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
for h3_item in search_div.findAll('h3'):
|
||||
byline = h3_item.h6
|
||||
if byline is not None:
|
||||
author = self.tag_to_string(byline,usa_alt=False)
|
||||
author = self.tag_to_string(byline,use_alt=False)
|
||||
else:
|
||||
author = ''
|
||||
a = h3_item.find('a', href=True)
|
||||
@ -579,7 +657,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
return self.filter_ans(self.get_tech_feeds(self.ans))
|
||||
|
||||
def parse_index(self):
|
||||
if self.headlinesOnly:
|
||||
@ -589,40 +667,198 @@ class NYTimes(BasicNewsRecipe):
|
||||
else:
|
||||
return self.parse_todays_index()
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
def strip_anchors(self,soup,kill_all=False):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
if kill_all or (self.recursions==0):
|
||||
a.replaceWith(self.tag_to_string(a,False))
|
||||
else:
|
||||
if a.has_key('href'):
|
||||
if a['href'].startswith('http://www.nytimes'):
|
||||
if not a['href'].endswith('pagewanted=all'):
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if self.exclude_url(url):
|
||||
a.replaceWith(self.tag_to_string(a,False))
|
||||
else:
|
||||
a['href'] = url+'?pagewanted=all'
|
||||
elif not (a['href'].startswith('http://pogue') or \
|
||||
a['href'].startswith('http://bits') or \
|
||||
a['href'].startswith('http://travel') or \
|
||||
a['href'].startswith('http://business') or \
|
||||
a['href'].startswith('http://tech') or \
|
||||
a['href'].startswith('http://health') or \
|
||||
a['href'].startswith('http://dealbook') or \
|
||||
a['href'].startswith('http://open')):
|
||||
a.replaceWith(self.tag_to_string(a,False))
|
||||
return soup
|
||||
|
||||
def handle_tags(self,soup):
|
||||
try:
|
||||
print("HANDLE TAGS: TITLE = "+self.tag_to_string(soup.title))
|
||||
except:
|
||||
print("HANDLE TAGS: NO TITLE")
|
||||
if soup is None:
|
||||
print("ERROR: handle_tags received NoneType")
|
||||
return None
|
||||
|
||||
## print("HANDLING AD FORWARD:")
|
||||
## print(soup)
|
||||
if self.keep_only_tags:
|
||||
body = Tag(soup, 'body')
|
||||
try:
|
||||
if isinstance(self.keep_only_tags, dict):
|
||||
self.keep_only_tags = [self.keep_only_tags]
|
||||
for spec in self.keep_only_tags:
|
||||
for tag in soup.find('body').findAll(**spec):
|
||||
body.insert(len(body.contents), tag)
|
||||
soup.find('body').replaceWith(body)
|
||||
except AttributeError: # soup has no body element
|
||||
pass
|
||||
|
||||
def remove_beyond(tag, next):
|
||||
while tag is not None and getattr(tag, 'name', None) != 'body':
|
||||
after = getattr(tag, next)
|
||||
while after is not None:
|
||||
ns = getattr(tag, next)
|
||||
after.extract()
|
||||
after = ns
|
||||
tag = tag.parent
|
||||
|
||||
if self.remove_tags_after is not None:
|
||||
rt = [self.remove_tags_after] if isinstance(self.remove_tags_after, dict) else self.remove_tags_after
|
||||
for spec in rt:
|
||||
tag = soup.find(**spec)
|
||||
remove_beyond(tag, 'nextSibling')
|
||||
|
||||
if self.remove_tags_before is not None:
|
||||
tag = soup.find(**self.remove_tags_before)
|
||||
remove_beyond(tag, 'previousSibling')
|
||||
|
||||
for kwds in self.remove_tags:
|
||||
for tag in soup.findAll(**kwds):
|
||||
tag.extract()
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if self.webEdition & (self.oldest_article>0):
|
||||
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
|
||||
if date_tag:
|
||||
date_str = self.tag_to_string(date_tag,use_alt=False)
|
||||
date_str = date_str.replace('Published:','')
|
||||
date_items = date_str.split(',')
|
||||
try:
|
||||
datestring = date_items[0]+' '+date_items[1]
|
||||
article_date = self.decode_us_date(datestring)
|
||||
except:
|
||||
article_date = date.today()
|
||||
if article_date < self.earliest_date:
|
||||
self.log("Skipping article dated %s" % date_str)
|
||||
return None
|
||||
print("PREPROCESS TITLE="+self.tag_to_string(soup.title))
|
||||
skip_tag = soup.find(True, {'name':'skip'})
|
||||
if skip_tag is not None:
|
||||
url = 'http://www.nytimes.com' + re.sub(r'\?.*', '', skip_tag.parent['href'])
|
||||
url += '?pagewanted=all'
|
||||
self.log.warn("Skipping ad to article at '%s'" % url)
|
||||
sleep(5)
|
||||
soup = self.handle_tags(self.article_to_soup(url))
|
||||
|
||||
#all articles are from today, no need to print the date on every page
|
||||
try:
|
||||
if not self.webEdition:
|
||||
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
|
||||
if date_tag:
|
||||
date_tag.extract()
|
||||
except:
|
||||
self.log("Error removing the published date")
|
||||
# check if the article is from one of the tech blogs
|
||||
blog=soup.find('div',attrs={'id':['pogue','bits','gadgetwise','open']})
|
||||
|
||||
if blog is not None:
|
||||
old_body = soup.find('body')
|
||||
new_body=Tag(soup,'body')
|
||||
new_body.append(soup.find('div',attrs={'id':'content'}))
|
||||
new_body.find('div',attrs={'id':'content'})['id']='blogcontent' # identify for postprocess_html
|
||||
old_body.replaceWith(new_body)
|
||||
for divr in soup.findAll('div',attrs={'class':re.compile('w190 right')}):
|
||||
if divr.find(text=re.compile('Sign up')):
|
||||
divr.extract()
|
||||
divr = soup.find('div',attrs={'id':re.compile('related-content')})
|
||||
if divr is not None:
|
||||
# handle related articles
|
||||
rlist = []
|
||||
ul = divr.find('ul')
|
||||
if ul is not None:
|
||||
for li in ul.findAll('li'):
|
||||
atag = li.find('a')
|
||||
if atag is not None:
|
||||
if atag['href'].startswith('http://pogue') or atag['href'].startswith('http://bits') or \
|
||||
atag['href'].startswith('http://open'):
|
||||
atag.find(text=True).replaceWith(self.massageNCXText(self.tag_to_string(atag,False)))
|
||||
rlist.append(atag)
|
||||
divr.extract()
|
||||
if rlist != []:
|
||||
asidediv = Tag(soup,'div',[('class','aside')])
|
||||
if soup.find('hr') is None:
|
||||
asidediv.append(Tag(soup,'hr'))
|
||||
h4 = Tag(soup,'h4',[('class','asidenote')])
|
||||
h4.insert(0,"Related Posts")
|
||||
asidediv.append(h4)
|
||||
ul = Tag(soup,'ul')
|
||||
for r in rlist:
|
||||
li = Tag(soup,'li',[('class','aside')])
|
||||
r['class'] = 'aside'
|
||||
li.append(r)
|
||||
ul.append(li)
|
||||
asidediv.append(ul)
|
||||
asidediv.append(Tag(soup,'hr'))
|
||||
smain = soup.find('body')
|
||||
smain.append(asidediv)
|
||||
for atag in soup.findAll('a'):
|
||||
img = atag.find('img')
|
||||
if img is not None:
|
||||
atag.replaceWith(img)
|
||||
elif not atag.has_key('href'):
|
||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||
elif not (atag['href'].startswith('http://www.nytimes') or atag['href'].startswith('http://pogue') or \
|
||||
atag['href'].startswith('http://bits') or atag['href'].startswith('http://open')):
|
||||
atag.replaceWith(atag.renderContents().decode('cp1252','replace'))
|
||||
hdr = soup.find('address')
|
||||
if hdr is not None:
|
||||
hdr.name='span'
|
||||
for span_credit in soup.findAll('span','credit'):
|
||||
sp = Tag(soup,'span')
|
||||
span_credit.replaceWith(sp)
|
||||
sp.append(Tag(soup,'br'))
|
||||
sp.append(span_credit)
|
||||
sp.append(Tag(soup,'br'))
|
||||
|
||||
else: # nytimes article
|
||||
|
||||
related = [] # these will be the related articles
|
||||
first_outer = None # first related outer tag
|
||||
first_related = None # first related tag
|
||||
for outerdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
|
||||
for rdiv in soup.findAll('div','columnGroup doubleRule'):
|
||||
if rdiv.find('h3') is not None:
|
||||
if self.tag_to_string(rdiv.h3,False).startswith('Related'):
|
||||
rdiv.h3.find(text=True).replaceWith("Related articles")
|
||||
rdiv.h3['class'] = 'asidenote'
|
||||
for litag in rdiv.findAll('li'):
|
||||
if litag.find('a') is not None:
|
||||
if litag.find('a')['href'].startswith('http://www.nytimes.com'):
|
||||
url = re.sub(r'\?.*', '', litag.find('a')['href'])
|
||||
litag.find('a')['href'] = url+'?pagewanted=all'
|
||||
litag.extract()
|
||||
related.append(litag)
|
||||
if first_related is None:
|
||||
first_related = rdiv
|
||||
first_outer = outerdiv
|
||||
else:
|
||||
litag.extract()
|
||||
if related != []:
|
||||
for r in related:
|
||||
if r.h6: # don't want the anchor inside a h6 tag
|
||||
r.h6.replaceWith(r.h6.a)
|
||||
first_related.ul.append(r)
|
||||
first_related.insert(0,Tag(soup,'hr'))
|
||||
first_related.append(Tag(soup,'hr'))
|
||||
first_related['class'] = 'aside'
|
||||
first_outer.replaceWith(first_related) # replace the outer tag with the related tag
|
||||
|
||||
for rdiv in soup.findAll(attrs={'class': re.compile('articleInline runaroundLeft')}):
|
||||
rdiv.extract()
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag: # remove Op_Ed author head shots
|
||||
tagline = self.tag_to_string(kicker_tag)
|
||||
if tagline=='Op-Ed Columnist':
|
||||
img_div = soup.find('div','inlineImage module')
|
||||
if img_div:
|
||||
img_div.extract()
|
||||
|
||||
if self.useHighResImages:
|
||||
try:
|
||||
@ -667,26 +903,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
except Exception:
|
||||
self.log("Error pulling high resolution images")
|
||||
|
||||
try:
|
||||
#remove "Related content" bar
|
||||
runAroundsFound = soup.findAll('div',{'class':['articleInline runaroundLeft','articleInline doubleRule runaroundLeft','articleInline runaroundLeft firstArticleInline','articleInline runaroundLeft ','articleInline runaroundLeft lastArticleInline']})
|
||||
if runAroundsFound:
|
||||
for runAround in runAroundsFound:
|
||||
#find all section headers
|
||||
hlines = runAround.findAll(True ,{'class':['sectionHeader','sectionHeader flushBottom']})
|
||||
if hlines:
|
||||
for hline in hlines:
|
||||
hline.extract()
|
||||
|
||||
#find all section headers
|
||||
hlines = runAround.findAll('h6')
|
||||
if hlines:
|
||||
for hline in hlines:
|
||||
hline.extract()
|
||||
except:
|
||||
self.log("Error removing related content bar")
|
||||
|
||||
|
||||
try:
|
||||
#in case pulling images failed, delete the enlarge this text
|
||||
enlargeThisList = soup.findAll('div',{'class':'icon enlargeThis'})
|
||||
@ -696,9 +912,24 @@ class NYTimes(BasicNewsRecipe):
|
||||
except:
|
||||
self.log("Error removing Enlarge this text")
|
||||
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
def postprocess_html(self,soup, True):
|
||||
return self.strip_anchors(soup,False)
|
||||
|
||||
def postprocess_html(self,soup,first_fetch):
|
||||
if not first_fetch: # remove Related links
|
||||
for aside in soup.findAll('div','aside'):
|
||||
aside.extract()
|
||||
soup = self.strip_anchors(soup,True)
|
||||
|
||||
if soup.find('div',attrs={'id':'blogcontent'}) is None:
|
||||
if first_fetch:
|
||||
aside = soup.find('div','aside')
|
||||
if aside is not None: # move the related list to the end of the article
|
||||
art = soup.find('div',attrs={'id':'article'})
|
||||
if art is None:
|
||||
art = soup.find('div',attrs={'class':'article'})
|
||||
if art is not None:
|
||||
art.append(aside)
|
||||
try:
|
||||
if self.one_picture_per_article:
|
||||
# Remove all images after first
|
||||
@ -855,23 +1086,22 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.log("ERROR: Problem in Add class=authorId to <div> so we can format with CSS")
|
||||
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first and hasattr(self, 'add_toc_thumbnail'):
|
||||
if not first:
|
||||
return
|
||||
idxdiv = soup.find('div',attrs={'class':'articleSpanImage'})
|
||||
if idxdiv is not None:
|
||||
if idxdiv.img:
|
||||
self.add_toc_thumbnail(article, idxdiv.img['src'])
|
||||
self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',idxdiv.img['src']))
|
||||
else:
|
||||
img = soup.find('img')
|
||||
img = soup.find('body').find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
self.add_toc_thumbnail(article, re.sub(r'links\\link\d+\\','',img['src']))
|
||||
shortparagraph = ""
|
||||
try:
|
||||
if len(article.text_summary.strip()) == 0:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'articleBody'})
|
||||
if not articlebodies: #added to account for blog formats
|
||||
articlebodies = soup.findAll('div', attrs={'class':'entry-content'}) #added to account for blog formats
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
@ -880,15 +1110,23 @@ class NYTimes(BasicNewsRecipe):
|
||||
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
|
||||
#account for blank paragraphs and short paragraphs by appending them to longer ones
|
||||
if len(refparagraph) > 0:
|
||||
if len(refparagraph) > 140: #approximately two lines of text
|
||||
article.summary = article.text_summary = shortparagraph + refparagraph
|
||||
if len(refparagraph) > 70: #approximately one line of text
|
||||
newpara = shortparagraph + refparagraph
|
||||
newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
|
||||
if newparaEm == '':
|
||||
newparaDateline,newparaEm,newparaDesc = newpara.partition('—')
|
||||
if newparaEm == '':
|
||||
newparaDesc = newparaDateline
|
||||
article.summary = article.text_summary = newparaDesc.strip()
|
||||
return
|
||||
else:
|
||||
shortparagraph = refparagraph + " "
|
||||
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
|
||||
shortparagraph = shortparagraph + "- "
|
||||
|
||||
else:
|
||||
article.summary = article.text_summary = self.massageNCXText(article.text_summary)
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
|
||||
|
@ -8,19 +8,19 @@ Fetch sueddeutsche.de
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Sueddeutsche(BasicNewsRecipe):
|
||||
|
||||
title = u'Süddeutsche.de' # 2012-01-26 AGe Correct Title
|
||||
description = 'News from Germany, Access to online content' # 2012-01-26 AGe
|
||||
__author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2012-01-26
|
||||
publisher = u'Süddeutsche Zeitung' # 2012-01-26 AGe add
|
||||
category = 'news, politics, Germany' # 2012-01-26 AGe add
|
||||
timefmt = ' [%a, %d %b %Y]' # 2012-01-26 AGe add %a
|
||||
title = u'Süddeutsche.de'
|
||||
description = 'News from Germany, Access to online content'
|
||||
__author__ = 'Oliver Niesner and Armin Geller' #Update AGe 2012-12-05
|
||||
publisher = u'Süddeutsche Zeitung'
|
||||
category = 'news, politics, Germany'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
language = 'de'
|
||||
encoding = 'utf-8'
|
||||
publication_type = 'newspaper' # 2012-01-26 add
|
||||
publication_type = 'newspaper'
|
||||
cover_source = 'http://www.sueddeutsche.de/verlag' # 2012-01-26 AGe add from Darko Miletic paid content source
|
||||
masthead_url = 'http://www.sueddeutsche.de/static_assets/build/img/sdesiteheader/logo_homepage.441d531c.png' # 2012-01-26 AGe add
|
||||
masthead_url = 'http://www.sueddeutsche.de/static_assets/img/sdesiteheader/logo_standard.a152b0df.png' # 2012-12-05 AGe add
|
||||
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
@ -40,9 +40,9 @@ class Sueddeutsche(BasicNewsRecipe):
|
||||
(u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
|
||||
(u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
|
||||
(u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
|
||||
(u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'), #2012-01-26 AGe New
|
||||
(u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'), #2012-01-26 AGe New
|
||||
(u'Stil', u'http://rss.sueddeutsche.de/rss/stil'), #2012-01-26 AGe New
|
||||
(u'Bildung', u'http://rss.sueddeutsche.de/rss/bildung'),
|
||||
(u'Gesundheit', u'http://rss.sueddeutsche.de/rss/gesundheit'),
|
||||
(u'Stil', u'http://rss.sueddeutsche.de/rss/stil'),
|
||||
(u'München & Region', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMünchen&Region%24?output=rss'),
|
||||
(u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
|
||||
(u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
|
||||
|
@ -2,8 +2,8 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '4 February 2011, desUBIKado'
|
||||
__author__ = 'desUBIKado'
|
||||
__version__ = 'v0.08'
|
||||
__date__ = '30, June 2012'
|
||||
__version__ = 'v0.09'
|
||||
__date__ = '02, December 2012'
|
||||
'''
|
||||
http://www.weblogssl.com/
|
||||
'''
|
||||
@ -37,6 +37,7 @@ class weblogssl(BasicNewsRecipe):
|
||||
,(u'Xataka Mexico', u'http://feeds.weblogssl.com/xatakamx')
|
||||
,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
|
||||
,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
|
||||
,(u'Xataka Windows', u'http://feeds.weblogssl.com/xatakawindows')
|
||||
,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
|
||||
,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
|
||||
,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
|
||||
@ -80,19 +81,31 @@ class weblogssl(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'infoblock'}),
|
||||
dict(name='div', attrs={'class':'post'}),
|
||||
dict(name='div', attrs={'id':'blog-comments'})
|
||||
dict(name='div', attrs={'id':'blog-comments'}),
|
||||
dict(name='div', attrs={'class':'container'}) #m.xataka.com
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':'comment-nav'})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'comment-nav'}),
|
||||
dict(name='menu', attrs={'class':'social-sharing'}), #m.xataka.com
|
||||
dict(name='section' , attrs={'class':'comments'}), #m.xataka.com
|
||||
dict(name='div' , attrs={'class':'article-comments'}), #m.xataka.com
|
||||
dict(name='nav' , attrs={'class':'article-taxonomy'}) #m.xataka.com
|
||||
]
|
||||
|
||||
remove_tags_after = dict(name='section' , attrs={'class':'comments'})
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.', 'http://m.')
|
||||
|
||||
preprocess_regexps = [
|
||||
# Para poner una linea en blanco entre un comentario y el siguiente
|
||||
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c')
|
||||
(re.compile(r'<li id="c', re.DOTALL|re.IGNORECASE), lambda match: '<br><br><li id="c'),
|
||||
# Para ver las imágenes en las noticias de m.xataka.com
|
||||
(re.compile(r'<noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'</noscript>', re.DOTALL|re.IGNORECASE), lambda m: '')
|
||||
]
|
||||
|
||||
|
||||
# Para sustituir el video incrustado de YouTube por una imagen
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
@ -108,14 +121,16 @@ class weblogssl(BasicNewsRecipe):
|
||||
|
||||
# Para obtener la url original del articulo a partir de la de "feedsportal"
|
||||
# El siguiente código es gracias al usuario "bosplans" de www.mobileread.com
|
||||
# http://www.mobileread.com/forums/sho...d.php?t=130297
|
||||
# http://www.mobileread.com/forums/showthread.php?t=130297
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link', None)
|
||||
if link is None:
|
||||
return article
|
||||
# if link.split('/')[-4]=="xataka2":
|
||||
# return article.get('feedburner_origlink', article.get('link', article.get('guid')))
|
||||
if link.split('/')[-4]=="xataka2":
|
||||
return article.get('feedburner_origlink', article.get('link', article.get('guid')))
|
||||
return article.get('guid', None)
|
||||
if link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
a=['0B','0C','0D','0E','0F','0G','0N' ,'0L0S','0A']
|
||||
|
@ -9,15 +9,15 @@ class Zaman (BasicNewsRecipe):
|
||||
__author__ = u'thomass'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed =50
|
||||
# no_stylesheets = True
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
#use_embedded_content = False
|
||||
encoding = 'ISO 8859-9'
|
||||
publisher = 'Zaman'
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publisher = 'Feza Gazetecilik'
|
||||
category = 'news, haberler,TR,gazete'
|
||||
language = 'tr'
|
||||
publication_type = 'newspaper '
|
||||
extra_css = '.buyukbaslik{font-weight: bold; font-size: 18px;color:#0000FF}'#body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
extra_css = 'h1{text-transform: capitalize; font-weight: bold; font-size: 22px;color:#0000FF} p{text-align:justify} ' #.introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
conversion_options = {
|
||||
'tags' : category
|
||||
,'language' : language
|
||||
@ -26,25 +26,26 @@ class Zaman (BasicNewsRecipe):
|
||||
}
|
||||
cover_img_url = 'https://fbcdn-profile-a.akamaihd.net/hprofile-ak-snc4/188140_81722291869_2111820_n.jpg'
|
||||
masthead_url = 'http://medya.zaman.com.tr/extentions/zaman.com.tr/img/section/logo-section.png'
|
||||
ignore_duplicate_articles = { 'title', 'url' }
|
||||
auto_cleanup = False
|
||||
remove_empty_feeds= True
|
||||
|
||||
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}) ]
|
||||
remove_tags = [ dict(name='img', attrs={'src':['http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif']})]#,dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']})
|
||||
#keep_only_tags = [dict(name='div', attrs={'id':[ 'contentposition19']})]#,dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'news-detail-content']}), dict(name='td', attrs={'class':['columnist-detail','columnist_head']}), ]
|
||||
remove_tags = [ dict(name='img', attrs={'src':['http://cmsmedya.zaman.com.tr/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':['interactive-hr']})]# remove_tags = [ dict(name='div', attrs={'class':[ 'detayUyari']}),dict(name='div', attrs={'class':[ 'detayYorum']}),dict(name='div', attrs={'class':[ 'addthis_toolbox addthis_default_style ']}),dict(name='div', attrs={'id':[ 'tumYazi']})]#,dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='div', attrs={'id':[ 'xxx']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/zamantryeni/pics/zamanonline.gif']}),dict(name='div', attrs={'class':['radioEmbedBg','radyoProgramAdi']}),dict(name='a', attrs={'class':['webkit-html-attribute-value webkit-html-external-link']}),dict(name='table', attrs={'id':['yaziYorumTablosu']}),dict(name='img', attrs={'src':['http://medya.zaman.com.tr/pics/paylas.gif','http://medya.zaman.com.tr/extentions/zaman.com.tr/img/columnist/ma-16.png']}),dict(name='div', attrs={'id':[ 'news-detail-gallery']}),dict(name='div', attrs={'id':[ 'news-detail-title-bottom-part']}),dict(name='div', attrs={'id':[ 'news-detail-news-paging-main']})]#
|
||||
|
||||
|
||||
#remove_attributes = ['width','height']
|
||||
remove_empty_feeds= True
|
||||
|
||||
feeds = [
|
||||
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
|
||||
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
||||
#( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
|
||||
#( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
|
||||
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
|
||||
( u'Manşet', u'http://www.zaman.com.tr/manset.rss'),
|
||||
( u'Yazarlar', u'http://www.zaman.com.tr/yazarlar.rss'),
|
||||
( u'Politika', u'http://www.zaman.com.tr/politika.rss'),
|
||||
( u'Ekonomi', u'http://www.zaman.com.tr/ekonomi.rss'),
|
||||
( u'Dış Haberler', u'http://www.zaman.com.tr/dishaberler.rss'),
|
||||
( u'Son Dakika', u'http://www.zaman.com.tr/sondakika.rss'),
|
||||
( u'Gündem', u'http://www.zaman.com.tr/gundem.rss'),
|
||||
( u'Yorumlar', u'http://www.zaman.com.tr/yorumlar.rss'),
|
||||
( u'Röportaj', u'http://www.zaman.com.tr/roportaj.rss'),
|
||||
( u'Dizi Yazı', u'http://www.zaman.com.tr/dizi.rss'),
|
||||
@ -59,8 +60,9 @@ class Zaman (BasicNewsRecipe):
|
||||
( u'Cuma Eki', u'http://www.zaman.com.tr/cuma.rss'),
|
||||
( u'Cumaertesi Eki', u'http://www.zaman.com.tr/cumaertesi.rss'),
|
||||
( u'Pazar Eki', u'http://www.zaman.com.tr/pazar.rss'),
|
||||
( u'En çok Okunanlar', u'http://www.zaman.com.tr/max_all.rss'),
|
||||
( u'Anasayfa', u'http://www.zaman.com.tr/anasayfa.rss'),
|
||||
|
||||
]
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.zaman.com.tr/haber.do?haberno=', 'http://www.zaman.com.tr/yazdir.do?haberno=')
|
||||
|
||||
return url.replace('http://www.zaman.com.tr/newsDetail_getNewsById.action?newsId=', 'http://www.zaman.com.tr/newsDetail_openPrintPage.action?newsId=')
|
||||
|
@ -215,6 +215,8 @@ class Command(object):
|
||||
sys.stdout.flush()
|
||||
|
||||
def installer_name(ext, is64bit=False):
|
||||
if is64bit and ext == 'msi':
|
||||
return 'dist/%s-64bit-%s.msi'%(__appname__, __version__)
|
||||
if ext in ('exe', 'msi'):
|
||||
return 'dist/%s-%s.%s'%(__appname__, __version__, ext)
|
||||
if ext == 'dmg':
|
||||
|
@ -11,12 +11,11 @@ from distutils.spawn import find_executable
|
||||
|
||||
from PyQt4 import pyqtconfig
|
||||
|
||||
from setup import isosx, iswindows, islinux
|
||||
from setup import isosx, iswindows, islinux, is64bit
|
||||
|
||||
OSX_SDK = '/Developer/SDKs/MacOSX10.5.sdk'
|
||||
|
||||
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.5'
|
||||
is64bit = sys.maxsize > 2**32
|
||||
|
||||
NMAKE = RC = msvc = MT = win_inc = win_lib = win_ddk = win_ddk_lib_dirs = None
|
||||
if iswindows:
|
||||
|
@ -20,7 +20,7 @@ __all__ = [
|
||||
'upload_user_manual', 'upload_demo', 'reupload',
|
||||
'linux32', 'linux64', 'linux', 'linux_freeze',
|
||||
'osx32_freeze', 'osx', 'rsync', 'push',
|
||||
'win32_freeze', 'win32', 'win',
|
||||
'win32_freeze', 'win32', 'win64', 'win',
|
||||
'stage1', 'stage2', 'stage3', 'stage4', 'stage5', 'publish'
|
||||
]
|
||||
|
||||
@ -91,9 +91,10 @@ osx = OSX()
|
||||
from setup.installer.osx.app.main import OSX32_Freeze
|
||||
osx32_freeze = OSX32_Freeze()
|
||||
|
||||
from setup.installer.windows import Win, Win32
|
||||
from setup.installer.windows import Win, Win32, Win64
|
||||
win = Win()
|
||||
win32 = Win32()
|
||||
win64 = Win64()
|
||||
from setup.installer.windows.freeze import Win32Freeze
|
||||
win32_freeze = Win32Freeze()
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
/*
|
||||
* Memory DLL loading code
|
||||
* Version 0.0.2 with additions from Thomas Heller
|
||||
* Version 0.0.3
|
||||
*
|
||||
* Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de
|
||||
* Copyright (c) 2004-2012 by Joachim Bauch / mail@joachim-bauch.de
|
||||
* http://www.joachim-bauch.de
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* 2.0 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
@ -19,156 +19,48 @@
|
||||
*
|
||||
* The Initial Developer of the Original Code is Joachim Bauch.
|
||||
*
|
||||
* Portions created by Joachim Bauch are Copyright (C) 2004-2005
|
||||
* Portions created by Joachim Bauch are Copyright (C) 2004-2012
|
||||
* Joachim Bauch. All Rights Reserved.
|
||||
*
|
||||
* Portions Copyright (C) 2005 Thomas Heller.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __GNUC__
|
||||
// disable warnings about pointer <-> DWORD conversions
|
||||
#pragma warning( disable : 4311 4312 )
|
||||
#endif
|
||||
|
||||
#ifdef _WIN64
|
||||
#define POINTER_TYPE ULONGLONG
|
||||
#else
|
||||
#define POINTER_TYPE DWORD
|
||||
#endif
|
||||
|
||||
#include <Windows.h>
|
||||
#include <winnt.h>
|
||||
#if DEBUG_OUTPUT
|
||||
#ifdef DEBUG_OUTPUT
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifndef IMAGE_SIZEOF_BASE_RELOCATION
|
||||
// Vista SDKs no longer define IMAGE_SIZEOF_BASE_RELOCATION!?
|
||||
# define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION))
|
||||
#define IMAGE_SIZEOF_BASE_RELOCATION (sizeof(IMAGE_BASE_RELOCATION))
|
||||
#endif
|
||||
|
||||
#include "MemoryModule.h"
|
||||
|
||||
/*
|
||||
XXX We need to protect at least walking the 'loaded' linked list with a lock!
|
||||
*/
|
||||
|
||||
/******************************************************************/
|
||||
FINDPROC findproc;
|
||||
void *findproc_data = NULL;
|
||||
|
||||
struct NAME_TABLE {
|
||||
char *name;
|
||||
DWORD ordinal;
|
||||
};
|
||||
|
||||
typedef struct tagMEMORYMODULE {
|
||||
typedef struct {
|
||||
PIMAGE_NT_HEADERS headers;
|
||||
unsigned char *codeBase;
|
||||
HMODULE *modules;
|
||||
int numModules;
|
||||
int initialized;
|
||||
|
||||
struct NAME_TABLE *name_table;
|
||||
|
||||
char *name;
|
||||
int refcount;
|
||||
struct tagMEMORYMODULE *next, *prev;
|
||||
} MEMORYMODULE, *PMEMORYMODULE;
|
||||
|
||||
typedef BOOL (WINAPI *DllEntryProc)(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved);
|
||||
|
||||
#define GET_HEADER_DICTIONARY(module, idx) &(module)->headers->OptionalHeader.DataDirectory[idx]
|
||||
|
||||
MEMORYMODULE *loaded; /* linked list of loaded memory modules */
|
||||
|
||||
/* private - insert a loaded library in a linked list */
|
||||
static void _Register(char *name, MEMORYMODULE *module)
|
||||
{
|
||||
module->next = loaded;
|
||||
if (loaded)
|
||||
loaded->prev = module;
|
||||
module->prev = NULL;
|
||||
loaded = module;
|
||||
}
|
||||
|
||||
/* private - remove a loaded library from a linked list */
|
||||
static void _Unregister(MEMORYMODULE *module)
|
||||
{
|
||||
free(module->name);
|
||||
if (module->prev)
|
||||
module->prev->next = module->next;
|
||||
if (module->next)
|
||||
module->next->prev = module->prev;
|
||||
if (module == loaded)
|
||||
loaded = module->next;
|
||||
}
|
||||
|
||||
/* public - replacement for GetModuleHandle() */
|
||||
HMODULE MyGetModuleHandle(LPCTSTR lpModuleName)
|
||||
{
|
||||
MEMORYMODULE *p = loaded;
|
||||
while (p) {
|
||||
// If already loaded, only increment the reference count
|
||||
if (0 == stricmp(lpModuleName, p->name)) {
|
||||
return (HMODULE)p;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
return GetModuleHandle(lpModuleName);
|
||||
}
|
||||
|
||||
/* public - replacement for LoadLibrary, but searches FIRST for memory
|
||||
libraries, then for normal libraries. So, it will load libraries AS memory
|
||||
module if they are found by findproc().
|
||||
*/
|
||||
HMODULE MyLoadLibrary(char *lpFileName)
|
||||
{
|
||||
MEMORYMODULE *p = loaded;
|
||||
HMODULE hMod;
|
||||
|
||||
while (p) {
|
||||
// If already loaded, only increment the reference count
|
||||
if (0 == stricmp(lpFileName, p->name)) {
|
||||
p->refcount++;
|
||||
return (HMODULE)p;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
if (findproc && findproc_data) {
|
||||
void *pdata = findproc(lpFileName, findproc_data);
|
||||
if (pdata) {
|
||||
hMod = MemoryLoadLibrary(lpFileName, pdata);
|
||||
free(p);
|
||||
return hMod;
|
||||
}
|
||||
}
|
||||
hMod = LoadLibrary(lpFileName);
|
||||
return hMod;
|
||||
}
|
||||
|
||||
/* public - replacement for GetProcAddress() */
|
||||
FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName)
|
||||
{
|
||||
MEMORYMODULE *p = loaded;
|
||||
while (p) {
|
||||
if ((HMODULE)p == hModule)
|
||||
return MemoryGetProcAddress(p, lpProcName);
|
||||
p = p->next;
|
||||
}
|
||||
return GetProcAddress(hModule, lpProcName);
|
||||
}
|
||||
|
||||
/* public - replacement for FreeLibrary() */
|
||||
BOOL MyFreeLibrary(HMODULE hModule)
|
||||
{
|
||||
MEMORYMODULE *p = loaded;
|
||||
while (p) {
|
||||
if ((HMODULE)p == hModule) {
|
||||
if (--p->refcount == 0) {
|
||||
_Unregister(p);
|
||||
MemoryFreeLibrary(p);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
return FreeLibrary(hModule);
|
||||
}
|
||||
|
||||
#if DEBUG_OUTPUT
|
||||
#ifdef DEBUG_OUTPUT
|
||||
static void
|
||||
OutputLastError(const char *msg)
|
||||
{
|
||||
@ -184,20 +76,6 @@ OutputLastError(const char *msg)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
static int dprintf(char *fmt, ...)
|
||||
{
|
||||
char Buffer[4096];
|
||||
va_list marker;
|
||||
int result;
|
||||
|
||||
va_start(marker, fmt);
|
||||
result = vsprintf(Buffer, fmt, marker);
|
||||
OutputDebugString(Buffer);
|
||||
return result;
|
||||
}
|
||||
*/
|
||||
|
||||
static void
|
||||
CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMODULE module)
|
||||
{
|
||||
@ -205,15 +83,12 @@ CopySections(const unsigned char *data, PIMAGE_NT_HEADERS old_headers, PMEMORYMO
|
||||
unsigned char *codeBase = module->codeBase;
|
||||
unsigned char *dest;
|
||||
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
|
||||
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++)
|
||||
{
|
||||
if (section->SizeOfRawData == 0)
|
||||
{
|
||||
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) {
|
||||
if (section->SizeOfRawData == 0) {
|
||||
// section doesn't contain data in the dll itself, but may define
|
||||
// uninitialized data
|
||||
size = old_headers->OptionalHeader.SectionAlignment;
|
||||
if (size > 0)
|
||||
{
|
||||
if (size > 0) {
|
||||
dest = (unsigned char *)VirtualAlloc(codeBase + section->VirtualAddress,
|
||||
size,
|
||||
MEM_COMMIT,
|
||||
@ -255,66 +130,72 @@ FinalizeSections(PMEMORYMODULE module)
|
||||
{
|
||||
int i;
|
||||
PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(module->headers);
|
||||
#ifdef _WIN64
|
||||
POINTER_TYPE imageOffset = (module->headers->OptionalHeader.ImageBase & 0xffffffff00000000);
|
||||
#else
|
||||
#define imageOffset 0
|
||||
#endif
|
||||
|
||||
// loop through all sections and change access flags
|
||||
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++)
|
||||
{
|
||||
for (i=0; i<module->headers->FileHeader.NumberOfSections; i++, section++) {
|
||||
DWORD protect, oldProtect, size;
|
||||
int executable = (section->Characteristics & IMAGE_SCN_MEM_EXECUTE) != 0;
|
||||
int readable = (section->Characteristics & IMAGE_SCN_MEM_READ) != 0;
|
||||
int writeable = (section->Characteristics & IMAGE_SCN_MEM_WRITE) != 0;
|
||||
|
||||
if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
|
||||
{
|
||||
if (section->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) {
|
||||
// section is not needed any more and can safely be freed
|
||||
VirtualFree((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, MEM_DECOMMIT);
|
||||
VirtualFree((LPVOID)((POINTER_TYPE)section->Misc.PhysicalAddress | imageOffset), section->SizeOfRawData, MEM_DECOMMIT);
|
||||
continue;
|
||||
}
|
||||
|
||||
// determine protection flags based on characteristics
|
||||
protect = ProtectionFlags[executable][readable][writeable];
|
||||
if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED)
|
||||
if (section->Characteristics & IMAGE_SCN_MEM_NOT_CACHED) {
|
||||
protect |= PAGE_NOCACHE;
|
||||
}
|
||||
|
||||
// determine size of region
|
||||
size = section->SizeOfRawData;
|
||||
if (size == 0)
|
||||
{
|
||||
if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
|
||||
if (size == 0) {
|
||||
if (section->Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) {
|
||||
size = module->headers->OptionalHeader.SizeOfInitializedData;
|
||||
else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA)
|
||||
} else if (section->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA) {
|
||||
size = module->headers->OptionalHeader.SizeOfUninitializedData;
|
||||
}
|
||||
}
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
if (size > 0) {
|
||||
// change memory access flags
|
||||
if (VirtualProtect((LPVOID)section->Misc.PhysicalAddress, section->SizeOfRawData, protect, &oldProtect) == 0)
|
||||
#if DEBUG_OUTPUT
|
||||
if (VirtualProtect((LPVOID)((POINTER_TYPE)section->Misc.PhysicalAddress | imageOffset), size, protect, &oldProtect) == 0)
|
||||
#ifdef DEBUG_OUTPUT
|
||||
OutputLastError("Error protecting memory page")
|
||||
#endif
|
||||
;
|
||||
}
|
||||
}
|
||||
#ifndef _WIN64
|
||||
#undef imageOffset
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
|
||||
PerformBaseRelocation(PMEMORYMODULE module, SIZE_T delta)
|
||||
{
|
||||
DWORD i;
|
||||
unsigned char *codeBase = module->codeBase;
|
||||
|
||||
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_BASERELOC);
|
||||
if (directory->Size > 0)
|
||||
{
|
||||
PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION)(codeBase + directory->VirtualAddress);
|
||||
for (; relocation->VirtualAddress > 0; )
|
||||
{
|
||||
unsigned char *dest = (unsigned char *)(codeBase + relocation->VirtualAddress);
|
||||
if (directory->Size > 0) {
|
||||
PIMAGE_BASE_RELOCATION relocation = (PIMAGE_BASE_RELOCATION) (codeBase + directory->VirtualAddress);
|
||||
for (; relocation->VirtualAddress > 0; ) {
|
||||
unsigned char *dest = codeBase + relocation->VirtualAddress;
|
||||
unsigned short *relInfo = (unsigned short *)((unsigned char *)relocation + IMAGE_SIZEOF_BASE_RELOCATION);
|
||||
for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++)
|
||||
{
|
||||
for (i=0; i<((relocation->SizeOfBlock-IMAGE_SIZEOF_BASE_RELOCATION) / 2); i++, relInfo++) {
|
||||
DWORD *patchAddrHL;
|
||||
#ifdef _WIN64
|
||||
ULONGLONG *patchAddr64;
|
||||
#endif
|
||||
int type, offset;
|
||||
|
||||
// the upper 4 bits define the type of relocation
|
||||
@ -330,10 +211,17 @@ PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
|
||||
|
||||
case IMAGE_REL_BASED_HIGHLOW:
|
||||
// change complete 32 bit address
|
||||
patchAddrHL = (DWORD *)(dest + offset);
|
||||
*patchAddrHL += delta;
|
||||
patchAddrHL = (DWORD *) (dest + offset);
|
||||
*patchAddrHL += (DWORD)delta;
|
||||
break;
|
||||
|
||||
#ifdef _WIN64
|
||||
case IMAGE_REL_BASED_DIR64:
|
||||
patchAddr64 = (ULONGLONG *) (dest + offset);
|
||||
*patchAddr64 += delta;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
//printf("Unknown relocation: %d\n", type);
|
||||
break;
|
||||
@ -341,7 +229,7 @@ PerformBaseRelocation(PMEMORYMODULE module, DWORD delta)
|
||||
}
|
||||
|
||||
// advance to next relocation block
|
||||
relocation = (PIMAGE_BASE_RELOCATION)(((DWORD)relocation) + relocation->SizeOfBlock);
|
||||
relocation = (PIMAGE_BASE_RELOCATION) (((char *) relocation) + relocation->SizeOfBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -353,18 +241,13 @@ BuildImportTable(PMEMORYMODULE module)
|
||||
unsigned char *codeBase = module->codeBase;
|
||||
|
||||
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_IMPORT);
|
||||
if (directory->Size > 0)
|
||||
{
|
||||
PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR)(codeBase + directory->VirtualAddress);
|
||||
for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++)
|
||||
{
|
||||
DWORD *thunkRef, *funcRef;
|
||||
HMODULE handle;
|
||||
|
||||
handle = MyLoadLibrary(codeBase + importDesc->Name);
|
||||
if (handle == INVALID_HANDLE_VALUE)
|
||||
{
|
||||
//LastError should already be set
|
||||
if (directory->Size > 0) {
|
||||
PIMAGE_IMPORT_DESCRIPTOR importDesc = (PIMAGE_IMPORT_DESCRIPTOR) (codeBase + directory->VirtualAddress);
|
||||
for (; !IsBadReadPtr(importDesc, sizeof(IMAGE_IMPORT_DESCRIPTOR)) && importDesc->Name; importDesc++) {
|
||||
POINTER_TYPE *thunkRef;
|
||||
FARPROC *funcRef;
|
||||
HMODULE handle = LoadLibrary((LPCSTR) (codeBase + importDesc->Name));
|
||||
if (handle == NULL) {
|
||||
#if DEBUG_OUTPUT
|
||||
OutputLastError("Can't load library");
|
||||
#endif
|
||||
@ -373,81 +256,54 @@ BuildImportTable(PMEMORYMODULE module)
|
||||
}
|
||||
|
||||
module->modules = (HMODULE *)realloc(module->modules, (module->numModules+1)*(sizeof(HMODULE)));
|
||||
if (module->modules == NULL)
|
||||
{
|
||||
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
|
||||
if (module->modules == NULL) {
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
module->modules[module->numModules++] = handle;
|
||||
if (importDesc->OriginalFirstThunk)
|
||||
{
|
||||
thunkRef = (DWORD *)(codeBase + importDesc->OriginalFirstThunk);
|
||||
funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
|
||||
if (importDesc->OriginalFirstThunk) {
|
||||
thunkRef = (POINTER_TYPE *) (codeBase + importDesc->OriginalFirstThunk);
|
||||
funcRef = (FARPROC *) (codeBase + importDesc->FirstThunk);
|
||||
} else {
|
||||
// no hint table
|
||||
thunkRef = (DWORD *)(codeBase + importDesc->FirstThunk);
|
||||
funcRef = (DWORD *)(codeBase + importDesc->FirstThunk);
|
||||
thunkRef = (POINTER_TYPE *) (codeBase + importDesc->FirstThunk);
|
||||
funcRef = (FARPROC *) (codeBase + importDesc->FirstThunk);
|
||||
}
|
||||
for (; *thunkRef; thunkRef++, funcRef++)
|
||||
{
|
||||
if IMAGE_SNAP_BY_ORDINAL(*thunkRef) {
|
||||
*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
|
||||
for (; *thunkRef; thunkRef++, funcRef++) {
|
||||
if (IMAGE_SNAP_BY_ORDINAL(*thunkRef)) {
|
||||
*funcRef = (FARPROC)GetProcAddress(handle, (LPCSTR)IMAGE_ORDINAL(*thunkRef));
|
||||
} else {
|
||||
PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME)(codeBase + *thunkRef);
|
||||
*funcRef = (DWORD)MyGetProcAddress(handle, (LPCSTR)&thunkData->Name);
|
||||
PIMAGE_IMPORT_BY_NAME thunkData = (PIMAGE_IMPORT_BY_NAME) (codeBase + (*thunkRef));
|
||||
*funcRef = (FARPROC)GetProcAddress(handle, (LPCSTR)&thunkData->Name);
|
||||
}
|
||||
if (*funcRef == 0)
|
||||
{
|
||||
SetLastError(ERROR_PROC_NOT_FOUND);
|
||||
if (*funcRef == 0) {
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!result)
|
||||
if (!result) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
MemoryLoadLibrary - load a library AS MEMORY MODULE, or return
|
||||
existing MEMORY MODULE with increased refcount.
|
||||
|
||||
This allows to load a library AGAIN as memory module which is
|
||||
already loaded as HMODULE!
|
||||
|
||||
*/
|
||||
HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
|
||||
HMEMORYMODULE MemoryLoadLibrary(const void *data)
|
||||
{
|
||||
PMEMORYMODULE result;
|
||||
PIMAGE_DOS_HEADER dos_header;
|
||||
PIMAGE_NT_HEADERS old_header;
|
||||
unsigned char *code, *headers;
|
||||
DWORD locationDelta;
|
||||
SIZE_T locationDelta;
|
||||
DllEntryProc DllEntry;
|
||||
BOOL successfull;
|
||||
MEMORYMODULE *p = loaded;
|
||||
|
||||
while (p) {
|
||||
// If already loaded, only increment the reference count
|
||||
if (0 == stricmp(name, p->name)) {
|
||||
p->refcount++;
|
||||
return (HMODULE)p;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
|
||||
/* Do NOT check for GetModuleHandle here! */
|
||||
|
||||
dos_header = (PIMAGE_DOS_HEADER)data;
|
||||
if (dos_header->e_magic != IMAGE_DOS_SIGNATURE)
|
||||
{
|
||||
SetLastError(ERROR_BAD_FORMAT);
|
||||
if (dos_header->e_magic != IMAGE_DOS_SIGNATURE) {
|
||||
#if DEBUG_OUTPUT
|
||||
OutputDebugString("Not a valid executable file.\n");
|
||||
#endif
|
||||
@ -455,9 +311,7 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
|
||||
}
|
||||
|
||||
old_header = (PIMAGE_NT_HEADERS)&((const unsigned char *)(data))[dos_header->e_lfanew];
|
||||
if (old_header->Signature != IMAGE_NT_SIGNATURE)
|
||||
{
|
||||
SetLastError(ERROR_BAD_FORMAT);
|
||||
if (old_header->Signature != IMAGE_NT_SIGNATURE) {
|
||||
#if DEBUG_OUTPUT
|
||||
OutputDebugString("No PE header found.\n");
|
||||
#endif
|
||||
@ -470,31 +324,25 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
|
||||
MEM_RESERVE,
|
||||
PAGE_READWRITE);
|
||||
|
||||
if (code == NULL)
|
||||
if (code == NULL) {
|
||||
// try to allocate memory at arbitrary position
|
||||
code = (unsigned char *)VirtualAlloc(NULL,
|
||||
old_header->OptionalHeader.SizeOfImage,
|
||||
MEM_RESERVE,
|
||||
PAGE_READWRITE);
|
||||
|
||||
if (code == NULL)
|
||||
{
|
||||
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
|
||||
if (code == NULL) {
|
||||
#if DEBUG_OUTPUT
|
||||
OutputLastError("Can't reserve memory");
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
result = (PMEMORYMODULE)HeapAlloc(GetProcessHeap(), 0, sizeof(MEMORYMODULE));
|
||||
result->codeBase = code;
|
||||
result->numModules = 0;
|
||||
result->modules = NULL;
|
||||
result->initialized = 0;
|
||||
result->next = result->prev = NULL;
|
||||
result->refcount = 1;
|
||||
result->name = strdup(name);
|
||||
result->name_table = NULL;
|
||||
|
||||
// XXX: is it correct to commit the complete memory region at once?
|
||||
// calling DllEntry raises an exception if we don't...
|
||||
@ -514,31 +362,30 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
|
||||
result->headers = (PIMAGE_NT_HEADERS)&((const unsigned char *)(headers))[dos_header->e_lfanew];
|
||||
|
||||
// update position
|
||||
result->headers->OptionalHeader.ImageBase = (DWORD)code;
|
||||
result->headers->OptionalHeader.ImageBase = (POINTER_TYPE)code;
|
||||
|
||||
// copy sections from DLL file block to new memory location
|
||||
CopySections(data, old_header, result);
|
||||
|
||||
// adjust base address of imported data
|
||||
locationDelta = (DWORD)(code - old_header->OptionalHeader.ImageBase);
|
||||
if (locationDelta != 0)
|
||||
locationDelta = (SIZE_T)(code - old_header->OptionalHeader.ImageBase);
|
||||
if (locationDelta != 0) {
|
||||
PerformBaseRelocation(result, locationDelta);
|
||||
}
|
||||
|
||||
// load required dlls and adjust function table of imports
|
||||
if (!BuildImportTable(result))
|
||||
if (!BuildImportTable(result)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
// mark memory pages depending on section headers and release
|
||||
// sections that are marked as "discardable"
|
||||
FinalizeSections(result);
|
||||
|
||||
// get entry point of loaded library
|
||||
if (result->headers->OptionalHeader.AddressOfEntryPoint != 0)
|
||||
{
|
||||
DllEntry = (DllEntryProc)(code + result->headers->OptionalHeader.AddressOfEntryPoint);
|
||||
if (DllEntry == 0)
|
||||
{
|
||||
SetLastError(ERROR_BAD_FORMAT); /* XXX ? */
|
||||
if (result->headers->OptionalHeader.AddressOfEntryPoint != 0) {
|
||||
DllEntry = (DllEntryProc) (code + result->headers->OptionalHeader.AddressOfEntryPoint);
|
||||
if (DllEntry == 0) {
|
||||
#if DEBUG_OUTPUT
|
||||
OutputDebugString("Library has no entry point.\n");
|
||||
#endif
|
||||
@ -547,8 +394,7 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
|
||||
|
||||
// notify library about attaching to process
|
||||
successfull = (*DllEntry)((HINSTANCE)code, DLL_PROCESS_ATTACH, 0);
|
||||
if (!successfull)
|
||||
{
|
||||
if (!successfull) {
|
||||
#if DEBUG_OUTPUT
|
||||
OutputDebugString("Can't attach library.\n");
|
||||
#endif
|
||||
@ -557,99 +403,55 @@ HMEMORYMODULE MemoryLoadLibrary(char *name, const void *data)
|
||||
result->initialized = 1;
|
||||
}
|
||||
|
||||
_Register(name, result);
|
||||
|
||||
return (HMEMORYMODULE)result;
|
||||
|
||||
error:
|
||||
// cleanup
|
||||
free(result->name);
|
||||
MemoryFreeLibrary(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int _compare(const struct NAME_TABLE *p1, const struct NAME_TABLE *p2)
|
||||
{
|
||||
return stricmp(p1->name, p2->name);
|
||||
}
|
||||
|
||||
int _find(const char **name, const struct NAME_TABLE *p)
|
||||
{
|
||||
return stricmp(*name, p->name);
|
||||
}
|
||||
|
||||
struct NAME_TABLE *GetNameTable(PMEMORYMODULE module)
|
||||
{
|
||||
unsigned char *codeBase;
|
||||
PIMAGE_EXPORT_DIRECTORY exports;
|
||||
PIMAGE_DATA_DIRECTORY directory;
|
||||
DWORD i, *nameRef;
|
||||
WORD *ordinal;
|
||||
struct NAME_TABLE *p, *ptab;
|
||||
|
||||
if (module->name_table)
|
||||
return module->name_table;
|
||||
|
||||
codeBase = module->codeBase;
|
||||
directory = GET_HEADER_DICTIONARY(module, IMAGE_DIRECTORY_ENTRY_EXPORT);
|
||||
exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
|
||||
|
||||
nameRef = (DWORD *)(codeBase + exports->AddressOfNames);
|
||||
ordinal = (WORD *)(codeBase + exports->AddressOfNameOrdinals);
|
||||
|
||||
p = ((PMEMORYMODULE)module)->name_table = (struct NAME_TABLE *)malloc(sizeof(struct NAME_TABLE)
|
||||
* exports->NumberOfNames);
|
||||
if (p == NULL)
|
||||
return NULL;
|
||||
ptab = p;
|
||||
for (i=0; i<exports->NumberOfNames; ++i) {
|
||||
p->name = (char *)(codeBase + *nameRef++);
|
||||
p->ordinal = *ordinal++;
|
||||
++p;
|
||||
}
|
||||
qsort(ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _compare);
|
||||
return ptab;
|
||||
}
|
||||
|
||||
FARPROC MemoryGetProcAddress(HMEMORYMODULE module, const char *name)
|
||||
{
|
||||
unsigned char *codeBase = ((PMEMORYMODULE)module)->codeBase;
|
||||
int idx=-1;
|
||||
DWORD i, *nameRef;
|
||||
WORD *ordinal;
|
||||
PIMAGE_EXPORT_DIRECTORY exports;
|
||||
PIMAGE_DATA_DIRECTORY directory = GET_HEADER_DICTIONARY((PMEMORYMODULE)module, IMAGE_DIRECTORY_ENTRY_EXPORT);
|
||||
|
||||
if (directory->Size == 0)
|
||||
if (directory->Size == 0) {
|
||||
// no export table found
|
||||
return NULL;
|
||||
}
|
||||
|
||||
exports = (PIMAGE_EXPORT_DIRECTORY)(codeBase + directory->VirtualAddress);
|
||||
if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0)
|
||||
exports = (PIMAGE_EXPORT_DIRECTORY) (codeBase + directory->VirtualAddress);
|
||||
if (exports->NumberOfNames == 0 || exports->NumberOfFunctions == 0) {
|
||||
// DLL doesn't export anything
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (HIWORD(name)) {
|
||||
struct NAME_TABLE *ptab;
|
||||
struct NAME_TABLE *found;
|
||||
ptab = GetNameTable((PMEMORYMODULE)module);
|
||||
if (ptab == NULL)
|
||||
// some failure
|
||||
return NULL;
|
||||
found = bsearch(&name, ptab, exports->NumberOfNames, sizeof(struct NAME_TABLE), _find);
|
||||
if (found == NULL)
|
||||
// search function name in list of exported names
|
||||
nameRef = (DWORD *) (codeBase + exports->AddressOfNames);
|
||||
ordinal = (WORD *) (codeBase + exports->AddressOfNameOrdinals);
|
||||
for (i=0; i<exports->NumberOfNames; i++, nameRef++, ordinal++) {
|
||||
if (_stricmp(name, (const char *) (codeBase + (*nameRef))) == 0) {
|
||||
idx = *ordinal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (idx == -1) {
|
||||
// exported symbol not found
|
||||
return NULL;
|
||||
|
||||
idx = found->ordinal;
|
||||
}
|
||||
else
|
||||
idx = LOWORD(name) - exports->Base;
|
||||
|
||||
if ((DWORD)idx > exports->NumberOfFunctions)
|
||||
if ((DWORD)idx > exports->NumberOfFunctions) {
|
||||
// name <-> ordinal number don't match
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// AddressOfFunctions contains the RVAs to the "real" functions
|
||||
return (FARPROC)(codeBase + *(DWORD *)(codeBase + exports->AddressOfFunctions + (idx*4)));
|
||||
return (FARPROC) (codeBase + (*(DWORD *) (codeBase + exports->AddressOfFunctions + (idx*4))));
|
||||
}
|
||||
|
||||
void MemoryFreeLibrary(HMEMORYMODULE mod)
|
||||
@ -657,32 +459,29 @@ void MemoryFreeLibrary(HMEMORYMODULE mod)
|
||||
int i;
|
||||
PMEMORYMODULE module = (PMEMORYMODULE)mod;
|
||||
|
||||
if (module != NULL)
|
||||
{
|
||||
if (module->initialized != 0)
|
||||
{
|
||||
if (module != NULL) {
|
||||
if (module->initialized != 0) {
|
||||
// notify library about detaching from process
|
||||
DllEntryProc DllEntry = (DllEntryProc)(module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint);
|
||||
DllEntryProc DllEntry = (DllEntryProc) (module->codeBase + module->headers->OptionalHeader.AddressOfEntryPoint);
|
||||
(*DllEntry)((HINSTANCE)module->codeBase, DLL_PROCESS_DETACH, 0);
|
||||
module->initialized = 0;
|
||||
}
|
||||
|
||||
if (module->modules != NULL)
|
||||
{
|
||||
if (module->modules != NULL) {
|
||||
// free previously opened libraries
|
||||
for (i=0; i<module->numModules; i++)
|
||||
if (module->modules[i] != INVALID_HANDLE_VALUE)
|
||||
MyFreeLibrary(module->modules[i]);
|
||||
for (i=0; i<module->numModules; i++) {
|
||||
if (module->modules[i] != INVALID_HANDLE_VALUE) {
|
||||
FreeLibrary(module->modules[i]);
|
||||
}
|
||||
}
|
||||
|
||||
free(module->modules);
|
||||
}
|
||||
|
||||
if (module->codeBase != NULL)
|
||||
if (module->codeBase != NULL) {
|
||||
// release memory of library
|
||||
VirtualFree(module->codeBase, 0, MEM_RELEASE);
|
||||
|
||||
if (module->name_table != NULL)
|
||||
free(module->name_table);
|
||||
}
|
||||
|
||||
HeapFree(GetProcessHeap(), 0, module);
|
||||
}
|
||||
|
@ -1,12 +1,12 @@
|
||||
/*
|
||||
* Memory DLL loading code
|
||||
* Version 0.0.2
|
||||
* Version 0.0.3
|
||||
*
|
||||
* Copyright (c) 2004-2005 by Joachim Bauch / mail@joachim-bauch.de
|
||||
* Copyright (c) 2004-2012 by Joachim Bauch / mail@joachim-bauch.de
|
||||
* http://www.joachim-bauch.de
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* 2.0 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
@ -19,7 +19,7 @@
|
||||
*
|
||||
* The Initial Developer of the Original Code is Joachim Bauch.
|
||||
*
|
||||
* Portions created by Joachim Bauch are Copyright (C) 2004-2005
|
||||
* Portions created by Joachim Bauch are Copyright (C) 2004-2012
|
||||
* Joachim Bauch. All Rights Reserved.
|
||||
*
|
||||
*/
|
||||
@ -35,22 +35,12 @@ typedef void *HMEMORYMODULE;
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void *(*FINDPROC)();
|
||||
|
||||
extern FINDPROC findproc;
|
||||
extern void *findproc_data;
|
||||
|
||||
HMEMORYMODULE MemoryLoadLibrary(char *, const void *);
|
||||
HMEMORYMODULE MemoryLoadLibrary(const void *);
|
||||
|
||||
FARPROC MemoryGetProcAddress(HMEMORYMODULE, const char *);
|
||||
|
||||
void MemoryFreeLibrary(HMEMORYMODULE);
|
||||
|
||||
BOOL MyFreeLibrary(HMODULE hModule);
|
||||
HMODULE MyLoadLibrary(char *lpFileName);
|
||||
FARPROC MyGetProcAddress(HMODULE hModule, LPCSTR lpProcName);
|
||||
HMODULE MyGetModuleHandle(LPCTSTR lpModuleName);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -8,53 +8,66 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, shutil, subprocess
|
||||
|
||||
from setup import Command, __appname__, __version__
|
||||
from setup import Command, __appname__, __version__, installer_name
|
||||
from setup.installer import VMInstaller
|
||||
|
||||
class Win(Command):
|
||||
|
||||
description = 'Build windows binary installers'
|
||||
|
||||
sub_commands = ['win32']
|
||||
sub_commands = ['win64', 'win32']
|
||||
|
||||
def run(self, opts):
|
||||
pass
|
||||
|
||||
|
||||
class Win32(VMInstaller):
|
||||
|
||||
description = 'Build 32bit windows binary installer'
|
||||
|
||||
INSTALLER_EXT = 'exe'
|
||||
VM_NAME = 'xp_build'
|
||||
VM = '/vmware/bin/%s'%VM_NAME
|
||||
VM_CHECK = 'calibre_windows_xp_home'
|
||||
class WinBase(VMInstaller):
|
||||
FREEZE_COMMAND = 'win32_freeze'
|
||||
FREEZE_TEMPLATE = 'python -OO setup.py {freeze_command} --no-ice'
|
||||
INSTALLER_EXT = 'msi'
|
||||
SHUTDOWN_CMD = ['shutdown.exe', '-s', '-f', '-t', '0']
|
||||
|
||||
def sign_msi(self):
|
||||
print ('Signing installers ...')
|
||||
subprocess.check_call(['ssh', self.VM_NAME, '~/sign.sh'], shell=False)
|
||||
|
||||
class Win32(WinBase):
|
||||
|
||||
description = 'Build 32bit windows binary installer'
|
||||
|
||||
VM_NAME = 'xp_build'
|
||||
VM = '/vmware/bin/%s'%VM_NAME
|
||||
VM_CHECK = 'calibre_windows_xp_home'
|
||||
|
||||
@property
|
||||
def msi64(self):
|
||||
return installer_name('msi', is64bit=True)
|
||||
|
||||
def do_dl(self, installer, errmsg):
|
||||
subprocess.check_call(('scp',
|
||||
'%s:build/%s/%s'%(self.VM_NAME, __appname__, installer), 'dist'))
|
||||
if not os.path.exists(installer):
|
||||
self.warn(errmsg)
|
||||
raise SystemExit(1)
|
||||
|
||||
def download_installer(self):
|
||||
installer = self.installer()
|
||||
if os.path.exists('build/winfrozen'):
|
||||
shutil.rmtree('build/winfrozen')
|
||||
self.sign_msi()
|
||||
|
||||
subprocess.check_call(('scp',
|
||||
'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
|
||||
if not os.path.exists(installer):
|
||||
self.warn('Failed to freeze')
|
||||
raise SystemExit(1)
|
||||
|
||||
self.do_dl(installer, 'Failed to freeze')
|
||||
installer = 'dist/%s-portable-installer-%s.exe'%(__appname__, __version__)
|
||||
subprocess.check_call(('scp',
|
||||
'xp_build:build/%s/%s'%(__appname__, installer), 'dist'))
|
||||
if not os.path.exists(installer):
|
||||
self.warn('Failed to get portable installer')
|
||||
raise SystemExit(1)
|
||||
self.do_dl(installer, 'Failed to get portable installer')
|
||||
|
||||
class Win64(WinBase):
|
||||
|
||||
description = 'Build 64bit windows binary installer'
|
||||
|
||||
VM_NAME = 'win64'
|
||||
VM = '/vmware/bin/%s'%VM_NAME
|
||||
VM_CHECK = 'win64'
|
||||
IS_64_BIT = True
|
||||
BUILD_PREFIX = WinBase.BUILD_PREFIX + [
|
||||
'if [ -f "$HOME/.bash_profile" ] ; then',
|
||||
' source "$HOME/.bash_profile"',
|
||||
'fi',
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
@ -25,6 +25,7 @@ LZMA = r'Q:\easylzma\build\easylzma-0.0.8'
|
||||
|
||||
VERSION = re.sub('[a-z]\d+', '', __version__)
|
||||
WINVER = VERSION+'.0'
|
||||
machine = 'X64' if is64bit else 'X86'
|
||||
|
||||
DESCRIPTIONS = {
|
||||
'calibre' : 'The main calibre program',
|
||||
@ -90,6 +91,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
if not is64bit:
|
||||
self.build_portable()
|
||||
self.build_portable_installer()
|
||||
self.sign_installers()
|
||||
|
||||
def remove_CRT_from_manifests(self):
|
||||
'''
|
||||
@ -110,7 +112,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
self.info('Removing CRT dependency from manifest of: %s'%bn)
|
||||
# Blank out the bytes corresponding to the dependency specification
|
||||
nraw = repl_pat.sub(lambda m: b' '*len(m.group()), raw)
|
||||
if len(nraw) != len(raw):
|
||||
if len(nraw) != len(raw) or nraw == raw:
|
||||
raise Exception('Something went wrong with %s'%bn)
|
||||
with open(dll, 'wb') as f:
|
||||
f.write(nraw)
|
||||
@ -132,6 +134,23 @@ class Win32Freeze(Command, WixMixIn):
|
||||
# used instead
|
||||
shutil.copy2(f, tgt)
|
||||
|
||||
def fix_pyd_bootstraps_in(self, folder):
|
||||
for dirpath, dirnames, filenames in os.walk(folder):
|
||||
for f in filenames:
|
||||
name, ext = os.path.splitext(f)
|
||||
bpy = self.j(dirpath, name + '.py')
|
||||
if ext == '.pyd' and os.path.exists(bpy):
|
||||
with open(bpy, 'rb') as f:
|
||||
raw = f.read().strip()
|
||||
if (not raw.startswith('def __bootstrap__') or not
|
||||
raw.endswith('__bootstrap__()')):
|
||||
raise Exception('The file %r has non'
|
||||
' bootstrap code'%self.j(dirpath, f))
|
||||
for ext in ('.py', '.pyc', '.pyo'):
|
||||
x = self.j(dirpath, name+ext)
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
|
||||
def freeze(self):
|
||||
shutil.copy2(self.j(self.src_root, 'LICENSE'), self.base)
|
||||
|
||||
@ -184,23 +203,12 @@ class Win32Freeze(Command, WixMixIn):
|
||||
shutil.copytree(self.j(comext, 'shell'), self.j(sp_dir, 'win32com', 'shell'))
|
||||
shutil.rmtree(comext)
|
||||
|
||||
# Fix PyCrypto, removing the bootstrap .py modules that load the .pyd
|
||||
# modules, since they do not work when in a zip file
|
||||
for crypto_dir in glob.glob(self.j(sp_dir, 'pycrypto-*', 'Crypto')):
|
||||
for dirpath, dirnames, filenames in os.walk(crypto_dir):
|
||||
for f in filenames:
|
||||
name, ext = os.path.splitext(f)
|
||||
if ext == '.pyd':
|
||||
with open(self.j(dirpath, name+'.py')) as f:
|
||||
raw = f.read().strip()
|
||||
if (not raw.startswith('def __bootstrap__') or not
|
||||
raw.endswith('__bootstrap__()')):
|
||||
raise Exception('The PyCrypto file %r has non'
|
||||
' bootstrap code'%self.j(dirpath, f))
|
||||
for ext in ('.py', '.pyc', '.pyo'):
|
||||
x = self.j(dirpath, name+ext)
|
||||
if os.path.exists(x):
|
||||
os.remove(x)
|
||||
# Fix PyCrypto and Pillow, removing the bootstrap .py modules that load
|
||||
# the .pyd modules, since they do not work when in a zip file
|
||||
for folder in os.listdir(sp_dir):
|
||||
folder = self.j(sp_dir, folder)
|
||||
if os.path.isdir(folder):
|
||||
self.fix_pyd_bootstraps_in(folder)
|
||||
|
||||
for pat in (r'PyQt4\uic\port_v3', ):
|
||||
x = glob.glob(self.j(self.lib_dir, 'site-packages', pat))[0]
|
||||
@ -367,7 +375,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
if not self.opts.keep_site:
|
||||
os.remove(y)
|
||||
|
||||
def run_builder(self, cmd):
|
||||
def run_builder(self, cmd, show_output=False):
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
if p.wait() != 0:
|
||||
@ -376,6 +384,9 @@ class Win32Freeze(Command, WixMixIn):
|
||||
self.info(p.stdout.read())
|
||||
self.info(p.stderr.read())
|
||||
sys.exit(1)
|
||||
if show_output:
|
||||
self.info(p.stdout.read())
|
||||
self.info(p.stderr.read())
|
||||
|
||||
def build_portable_installer(self):
|
||||
zf = self.a(self.j('dist', 'calibre-portable-%s.zip.lz'%VERSION))
|
||||
@ -401,7 +412,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
exe = self.j('dist', 'calibre-portable-installer-%s.exe'%VERSION)
|
||||
if self.newer(exe, [obj, xobj]):
|
||||
self.info('Linking', exe)
|
||||
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
|
||||
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:'+machine,
|
||||
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
|
||||
'/LIBPATH:'+(LZMA+r'\lib\Release'),
|
||||
'/RELEASE', '/MANIFEST', '/MANIFESTUAC:level="asInvoker" uiAccess="false"',
|
||||
@ -458,7 +469,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
exe = self.j(base, 'calibre-portable.exe')
|
||||
if self.newer(exe, [obj]):
|
||||
self.info('Linking', exe)
|
||||
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
|
||||
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:'+machine,
|
||||
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:WINDOWS',
|
||||
'/RELEASE',
|
||||
'/ENTRY:wWinMainCRTStartup',
|
||||
@ -478,6 +489,17 @@ class Win32Freeze(Command, WixMixIn):
|
||||
|
||||
subprocess.check_call([LZMA + r'\bin\elzma.exe', '-9', '--lzip', name])
|
||||
|
||||
def sign_installers(self):
|
||||
self.info('Signing installers...')
|
||||
files = glob.glob(self.j('dist', '*.msi')) + glob.glob(self.j('dist',
|
||||
'*.exe'))
|
||||
if not files:
|
||||
raise ValueError('No installers found')
|
||||
subprocess.check_call(['signtool.exe', 'sign', '/a', '/d',
|
||||
'calibre - E-book management', '/du',
|
||||
'http://calibre-ebook.com', '/t',
|
||||
'http://timestamp.verisign.com/scripts/timstamp.dll'] + files)
|
||||
|
||||
def add_dir_to_zip(self, zf, path, prefix=''):
|
||||
'''
|
||||
Add a directory recursively to the zip file with an optional prefix.
|
||||
@ -499,9 +521,11 @@ class Win32Freeze(Command, WixMixIn):
|
||||
finally:
|
||||
os.chdir(cwd)
|
||||
|
||||
def build_launchers(self):
|
||||
def build_launchers(self, debug=False):
|
||||
if not os.path.exists(self.obj_dir):
|
||||
os.makedirs(self.obj_dir)
|
||||
dflags = (['/Zi'] if debug else [])
|
||||
dlflags = (['/DEBUG'] if debug else ['/INCREMENTAL:NO'])
|
||||
base = self.j(self.src_root, 'setup', 'installer', 'windows')
|
||||
sources = [self.j(base, x) for x in ['util.c', 'MemoryModule.c']]
|
||||
headers = [self.j(base, x) for x in ['util.h', 'MemoryModule.h']]
|
||||
@ -510,20 +534,20 @@ class Win32Freeze(Command, WixMixIn):
|
||||
cflags += ['/DPYDLL="python%s.dll"'%self.py_ver, '/IC:/Python%s/include'%self.py_ver]
|
||||
for src, obj in zip(sources, objects):
|
||||
if not self.newer(obj, headers+[src]): continue
|
||||
cmd = [msvc.cc] + cflags + ['/Fo'+obj, '/Tc'+src]
|
||||
self.run_builder(cmd)
|
||||
cmd = [msvc.cc] + cflags + dflags + ['/Fo'+obj, '/Tc'+src]
|
||||
self.run_builder(cmd, show_output=True)
|
||||
|
||||
dll = self.j(self.obj_dir, 'calibre-launcher.dll')
|
||||
ver = '.'.join(__version__.split('.')[:2])
|
||||
if self.newer(dll, objects):
|
||||
cmd = [msvc.linker, '/DLL', '/INCREMENTAL:NO', '/VERSION:'+ver,
|
||||
'/OUT:'+dll, '/nologo', '/MACHINE:X86'] + objects + \
|
||||
cmd = [msvc.linker, '/DLL', '/VERSION:'+ver, '/OUT:'+dll,
|
||||
'/nologo', '/MACHINE:'+machine] + dlflags + objects + \
|
||||
[self.embed_resources(dll),
|
||||
'/LIBPATH:C:/Python%s/libs'%self.py_ver,
|
||||
'python%s.lib'%self.py_ver,
|
||||
'/delayload:python%s.dll'%self.py_ver]
|
||||
self.info('Linking calibre-launcher.dll')
|
||||
self.run_builder(cmd)
|
||||
self.run_builder(cmd, show_output=True)
|
||||
|
||||
src = self.j(base, 'main.c')
|
||||
shutil.copy2(dll, self.base)
|
||||
@ -541,16 +565,16 @@ class Win32Freeze(Command, WixMixIn):
|
||||
dest = self.j(self.obj_dir, bname+'.obj')
|
||||
if self.newer(dest, [src]+headers):
|
||||
self.info('Compiling', bname)
|
||||
cmd = [msvc.cc] + xflags + ['/Tc'+src, '/Fo'+dest]
|
||||
cmd = [msvc.cc] + xflags + dflags + ['/Tc'+src, '/Fo'+dest]
|
||||
self.run_builder(cmd)
|
||||
exe = self.j(self.base, bname+'.exe')
|
||||
lib = dll.replace('.dll', '.lib')
|
||||
if self.newer(exe, [dest, lib, self.rc_template, __file__]):
|
||||
self.info('Linking', bname)
|
||||
cmd = [msvc.linker] + ['/INCREMENTAL:NO', '/MACHINE:X86',
|
||||
cmd = [msvc.linker] + ['/MACHINE:'+machine,
|
||||
'/LIBPATH:'+self.obj_dir, '/SUBSYSTEM:'+subsys,
|
||||
'/LIBPATH:C:/Python%s/libs'%self.py_ver, '/RELEASE',
|
||||
'/OUT:'+exe, self.embed_resources(exe),
|
||||
'/OUT:'+exe] + dlflags + [self.embed_resources(exe),
|
||||
dest, lib]
|
||||
self.run_builder(cmd)
|
||||
|
||||
@ -563,12 +587,18 @@ class Win32Freeze(Command, WixMixIn):
|
||||
for x in (self.plugins_dir, self.dll_dir):
|
||||
for pyd in os.listdir(x):
|
||||
if pyd.endswith('.pyd') and pyd not in {
|
||||
'unrar.pyd', 'sqlite_custom.pyd', 'calibre_style.pyd'}:
|
||||
# sqlite_custom has to be a file for
|
||||
# sqlite_load_extension to work
|
||||
# For some reason unrar.pyd crashes when processing
|
||||
# password protected RAR files if loaded from inside
|
||||
# pylib.zip
|
||||
'sqlite_custom.pyd',
|
||||
# calibre_style has to be loaded by Qt therefore it
|
||||
# must be a file
|
||||
'calibre_style.pyd',
|
||||
# Because of https://github.com/fancycode/MemoryModule/issues/4
|
||||
# any extensions that use C++ exceptions must be loaded
|
||||
# from files
|
||||
'unrar.pyd', 'wpd.pyd', 'podofo.pyd',
|
||||
'progress_indicator.pyd',
|
||||
}:
|
||||
self.add_to_zipfile(zf, pyd, x)
|
||||
os.remove(self.j(x, pyd))
|
||||
|
||||
@ -581,6 +611,7 @@ class Win32Freeze(Command, WixMixIn):
|
||||
sp = self.j(self.lib_dir, 'site-packages')
|
||||
# Special handling for PIL and pywin32
|
||||
handled = set(['PIL.pth', 'pywin32.pth', 'PIL', 'win32'])
|
||||
if not is64bit:
|
||||
self.add_to_zipfile(zf, 'PIL', sp)
|
||||
base = self.j(sp, 'win32', 'lib')
|
||||
for x in os.listdir(base):
|
||||
@ -593,16 +624,17 @@ class Win32Freeze(Command, WixMixIn):
|
||||
self.add_to_zipfile(zf, x, base)
|
||||
|
||||
handled.add('easy-install.pth')
|
||||
# We dont want the site.py from site-packages
|
||||
handled.add('site.pyo')
|
||||
|
||||
for d in self.get_pth_dirs(self.j(sp, 'easy-install.pth')):
|
||||
handled.add(self.b(d))
|
||||
for x in os.listdir(d):
|
||||
if x == 'EGG-INFO':
|
||||
if x in {'EGG-INFO', 'site.py', 'site.pyc', 'site.pyo'}:
|
||||
continue
|
||||
self.add_to_zipfile(zf, x, d)
|
||||
|
||||
# The rest of site-packages
|
||||
# We dont want the site.py from site-packages
|
||||
handled.add('site.pyo')
|
||||
for x in os.listdir(sp):
|
||||
if x in handled or x.endswith('.egg-info'):
|
||||
continue
|
||||
@ -622,8 +654,10 @@ class Win32Freeze(Command, WixMixIn):
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#') or line.startswith('import'):
|
||||
continue
|
||||
candidate = self.j(base, line)
|
||||
candidate = os.path.abspath(self.j(base, line))
|
||||
if os.path.exists(candidate):
|
||||
if not os.path.isdir(candidate):
|
||||
raise ValueError('%s is not a directory'%candidate)
|
||||
yield candidate
|
||||
|
||||
def add_to_zipfile(self, zf, name, base, exclude=frozenset()):
|
||||
|
@ -109,10 +109,8 @@ of mimetypes from the windows registry
|
||||
Python packages
|
||||
------------------
|
||||
|
||||
Install setuptools from http://pypi.python.org/pypi/setuptools If there are no
|
||||
windows binaries already compiled for the version of python you are using then
|
||||
download the source and run the following command in the folder where the
|
||||
source has been unpacked::
|
||||
Install setuptools from http://pypi.python.org/pypi/setuptools. Use the source
|
||||
tarball. Edit setup.py and set zip_safe=False. Then run::
|
||||
|
||||
python setup.py install
|
||||
|
||||
|
@ -418,9 +418,12 @@ static BOOL move_program() {
|
||||
}
|
||||
|
||||
if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) {
|
||||
show_last_error(L"Failed to move calibre program folder");
|
||||
Sleep(4000); // Sleep and try again
|
||||
if (MoveFileEx(L"Calibre Portable\\Calibre", L"..\\Calibre", 0) == 0) {
|
||||
show_last_error(L"Failed to move calibre program folder. This is usually caused by an antivirus program or a file sync program like DropBox. Turn them off temporarily and try again. Underlying error: ");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!directory_exists(L"..\\Calibre Library")) {
|
||||
MoveFileEx(L"Calibre Portable\\Calibre Library", L"..\\Calibre Library", 0);
|
||||
|
@ -16,6 +16,7 @@ static char python_dll[] = PYDLL;
|
||||
void set_gui_app(char yes) { GUI_APP = yes; }
|
||||
char is_gui_app() { return GUI_APP; }
|
||||
|
||||
int calibre_show_python_error(const wchar_t *preamble, int code);
|
||||
|
||||
// memimporter {{{
|
||||
|
||||
@ -63,17 +64,6 @@ static void* FindLibrary(char *name, PyObject *callback)
|
||||
return p;
|
||||
}
|
||||
|
||||
static PyObject *set_find_proc(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *callback = NULL;
|
||||
if (!PyArg_ParseTuple(args, "|O:set_find_proc", &callback))
|
||||
return NULL;
|
||||
Py_DECREF((PyObject *)findproc_data);
|
||||
Py_INCREF(callback);
|
||||
findproc_data = (void *)callback;
|
||||
return Py_BuildValue("i", 1);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
import_module(PyObject *self, PyObject *args)
|
||||
{
|
||||
@ -92,7 +82,7 @@ import_module(PyObject *self, PyObject *args)
|
||||
&data, &size,
|
||||
&initfuncname, &modname, &pathname))
|
||||
return NULL;
|
||||
hmem = MemoryLoadLibrary(pathname, data);
|
||||
hmem = MemoryLoadLibrary(data);
|
||||
if (!hmem) {
|
||||
PyErr_Format(*DLL_ImportError,
|
||||
"MemoryLoadLibrary() failed loading %s", pathname);
|
||||
@ -119,14 +109,14 @@ import_module(PyObject *self, PyObject *args)
|
||||
static PyMethodDef methods[] = {
|
||||
{ "import_module", import_module, METH_VARARGS,
|
||||
"import_module(code, initfunc, dllname[, finder]) -> module" },
|
||||
{ "set_find_proc", set_find_proc, METH_VARARGS },
|
||||
{ NULL, NULL }, /* Sentinel */
|
||||
};
|
||||
|
||||
// }}}
|
||||
|
||||
static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int code) {
|
||||
wchar_t *buf, *cbuf;
|
||||
wchar_t *buf;
|
||||
char *cbuf;
|
||||
buf = (wchar_t*)LocalAlloc(LMEM_ZEROINIT, sizeof(wchar_t)*
|
||||
(wcslen(msg) + wcslen(preamble) + 80));
|
||||
|
||||
@ -142,7 +132,7 @@ static int _show_error(const wchar_t *preamble, const wchar_t *msg, const int co
|
||||
else {
|
||||
cbuf = (char*) calloc(10+(wcslen(buf)*4), sizeof(char));
|
||||
if (cbuf) {
|
||||
if (WideCharToMultiByte(CP_UTF8, 0, buf, -1, cbuf, 10+(wcslen(buf)*4), NULL, NULL) != 0) printf_s(cbuf);
|
||||
if (WideCharToMultiByte(CP_UTF8, 0, buf, -1, cbuf, (int)(10+(wcslen(buf)*4)), NULL, NULL) != 0) printf_s(cbuf);
|
||||
free(cbuf);
|
||||
}
|
||||
}
|
||||
@ -165,6 +155,7 @@ int show_last_error_crt(wchar_t *preamble) {
|
||||
int show_last_error(wchar_t *preamble) {
|
||||
wchar_t *msg = NULL;
|
||||
DWORD dw = GetLastError();
|
||||
int ret;
|
||||
|
||||
FormatMessage(
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
@ -173,10 +164,13 @@ int show_last_error(wchar_t *preamble) {
|
||||
NULL,
|
||||
dw,
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
&msg,
|
||||
0, NULL );
|
||||
(LPWSTR)&msg,
|
||||
0,
|
||||
NULL );
|
||||
|
||||
return _show_error(preamble, msg, (int)dw);
|
||||
ret = _show_error(preamble, msg, (int)dw);
|
||||
if (msg != NULL) LocalFree(msg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
char* get_app_dir() {
|
||||
@ -254,10 +248,10 @@ void setup_stream(const char *name, const char *errors, UINT cp) {
|
||||
else if (cp == CP_UTF7) _snprintf_s(buf, 100, _TRUNCATE, "%s", "utf-7");
|
||||
else _snprintf_s(buf, 100, _TRUNCATE, "cp%d", cp);
|
||||
|
||||
stream = PySys_GetObject(name);
|
||||
stream = PySys_GetObject((char*)name);
|
||||
|
||||
if (!PyFile_SetEncodingAndErrors(stream, buf, errors))
|
||||
ExitProcess(calibre_show_python_error("Failed to set stream encoding", 1));
|
||||
if (!PyFile_SetEncodingAndErrors(stream, buf, (char*)errors))
|
||||
ExitProcess(calibre_show_python_error(L"Failed to set stream encoding", 1));
|
||||
|
||||
free(buf);
|
||||
|
||||
@ -372,7 +366,6 @@ void initialize_interpreter(wchar_t *outr, wchar_t *errr,
|
||||
}
|
||||
PySys_SetObject("argv", argv);
|
||||
|
||||
findproc = FindLibrary;
|
||||
Py_InitModule3("_memimporter", methods, module_doc);
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi' xmlns:util="http://schemas.microsoft.com/wix/UtilExtension"
|
||||
>
|
||||
|
||||
<Product Name='{app}' Id='*' UpgradeCode='{upgrade_code}'
|
||||
<Product Name='{app}{x64}' Id='*' UpgradeCode='{upgrade_code}'
|
||||
Language='1033' Codepage='1252' Version='{version}' Manufacturer='Kovid Goyal'>
|
||||
|
||||
<Package Id='*' Keywords='Installer' Description="{app} Installer"
|
||||
@ -29,19 +29,24 @@
|
||||
Language="1033"
|
||||
Property="NEWPRODUCTFOUND"/>
|
||||
</Upgrade>
|
||||
<CustomAction Id="PreventDowngrading" Error="Newer version already installed."/>
|
||||
<CustomAction Id="PreventDowngrading" Error="Newer version of {app} already installed. If you want to downgrade you must uninstall {app} first."/>
|
||||
|
||||
<Property Id="APPLICATIONFOLDER">
|
||||
<RegistrySearch Id='calibreInstDir' Type='raw'
|
||||
Root='HKLM' Key="Software\{app}\Installer" Name="InstallPath" />
|
||||
Root='HKLM' Key="Software\{app}{x64}\Installer" Name="InstallPath" />
|
||||
</Property>
|
||||
|
||||
<Directory Id='TARGETDIR' Name='SourceDir'>
|
||||
<Directory Id='ProgramFilesFolder' Name='PFiles'>
|
||||
<Directory Id='APPLICATIONFOLDER' Name='{app}' />
|
||||
<Directory Id='{ProgramFilesFolder}' Name='PFiles'>
|
||||
<!-- The name must be calibre on 32 bit to ensure
|
||||
that the component guids dont change compared
|
||||
to previous msis. However, on 64 bit it must
|
||||
be Calibre2 otherwise by default it will
|
||||
install to C:\Program Files\calibre -->
|
||||
<Directory Id='APPLICATIONFOLDER' Name="{appfolder}" />
|
||||
</Directory>
|
||||
<Directory Id="ProgramMenuFolder">
|
||||
<Directory Id="ApplicationProgramsFolder" Name="{app} - E-book Management"/>
|
||||
<Directory Id="ApplicationProgramsFolder" Name="{app}{x64} - E-book Management"/>
|
||||
</Directory>
|
||||
<Directory Id="DesktopFolder" Name="Desktop"/>
|
||||
</Directory>
|
||||
@ -50,24 +55,24 @@
|
||||
{app_components}
|
||||
<Component Id="AddToPath" Guid="*">
|
||||
<Environment Id='UpdatePath' Name='PATH' Action='set' System='yes' Part='last' Value='[APPLICATIONFOLDER]' />
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="system_path_updated" Type="integer" Value="1" KeyPath="yes"/>
|
||||
</Component>
|
||||
<Component Id="RememberInstallDir" Guid="*">
|
||||
<RegistryValue Root="HKLM" Key="Software\{app}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
|
||||
<RegistryValue Root="HKLM" Key="Software\{app}{x64}\Installer" Name="InstallPath" Type="string" Value="[APPLICATIONFOLDER]" KeyPath="yes"/>
|
||||
</Component>
|
||||
</DirectoryRef>
|
||||
|
||||
<DirectoryRef Id="ApplicationProgramsFolder">
|
||||
<Component Id="StartMenuShortcuts" Guid="*">
|
||||
<Shortcut Id="s1" Name="{app} - E-book management"
|
||||
<Shortcut Id="s1" Name="{app}{x64} - E-book management"
|
||||
Description="Manage your e-book collection and download news"
|
||||
Target="[#{exe_map[calibre]}]"
|
||||
WorkingDirectory="APPLICATIONROOTDIRECTORY" />
|
||||
<Shortcut Id="s2" Name="E-book viewer"
|
||||
<Shortcut Id="s2" Name="E-book viewer{x64}"
|
||||
Description="Viewer for all the major e-book formats"
|
||||
Target="[#{exe_map[ebook-viewer]}]"
|
||||
WorkingDirectory="APPLICATIONROOTDIRECTORY" />
|
||||
<Shortcut Id="s3" Name="LRF viewer"
|
||||
<Shortcut Id="s3" Name="LRF viewer{x64}"
|
||||
Description="Viewer for LRF format e-books"
|
||||
Target="[#{exe_map[lrfviewer]}]"
|
||||
WorkingDirectory="APPLICATIONROOTDIRECTORY" />
|
||||
@ -79,17 +84,17 @@
|
||||
Target="http://calibre-ebook.com/get-involved"/>
|
||||
|
||||
<RemoveFolder Id="ApplicationProgramsFolder" On="uninstall"/>
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="start_menu_shortcuts_installed" Type="integer" Value="1" KeyPath="yes"/>
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="start_menu_shortcuts_installed" Type="integer" Value="1" KeyPath="yes"/>
|
||||
</Component>
|
||||
</DirectoryRef>
|
||||
|
||||
<DirectoryRef Id="DesktopFolder">
|
||||
<Component Id="DesktopShortcut" Guid="*">
|
||||
<Shortcut Id="ds1" Name="{app} - E-book management"
|
||||
<Shortcut Id="ds1" Name="{app}{x64} - E-book management"
|
||||
Description="Manage your e-book collection and download news"
|
||||
Target="[#{exe_map[calibre]}]"
|
||||
WorkingDirectory="APPLICATIONROOTDIRECTORY" />
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}" Name="desktop_shortcut_installed" Type="integer" Value="1" KeyPath="yes"/>
|
||||
<RegistryValue Root="HKCU" Key="Software\Microsoft\{app}{x64}" Name="desktop_shortcut_installed" Type="integer" Value="1" KeyPath="yes"/>
|
||||
</Component>
|
||||
</DirectoryRef>
|
||||
|
||||
@ -122,17 +127,35 @@
|
||||
<!-- Add icon to entry in Add/Remove programs -->
|
||||
<Icon Id="main_icon" SourceFile="{main_icon}"/>
|
||||
<Property Id="ARPPRODUCTICON" Value="main_icon" />
|
||||
<Property Id="ARPURLINFOABOUT" Value="http://calibre-ebook.com" />
|
||||
<Property Id='ARPHELPLINK' Value="http://calibre-ebook.com/help" />
|
||||
<Property Id='ARPURLUPDATEINFO' Value="http://calibre-ebook.com/download_windows" />
|
||||
<SetProperty Id="ARPINSTALLLOCATION" Value="[APPLICATIONFOLDER]" After="CostFinalize" />
|
||||
|
||||
<Condition
|
||||
Message="This application is only supported on Windows XP SP3, or higher.">
|
||||
<![CDATA[Installed OR (VersionNT >= 501)]]>
|
||||
Message="This application is only supported on {minverhuman}, or higher.">
|
||||
<![CDATA[Installed OR (VersionNT >= {minver})]]>
|
||||
</Condition>
|
||||
<!-- On 64 bit installers there is a bug in WiX that causes the
|
||||
WixSetDefaultPerMachineFolder action to incorrectly set
|
||||
APPLICATIONFOLDER to the x86 value, so we override it. See
|
||||
http://stackoverflow.com/questions/5479790/wix-how-to-override-c-program-files-x86-on-x64-machine-in-wixui-advanced-s
|
||||
-->
|
||||
<CustomAction
|
||||
Id="OverwriteWixSetDefaultPerMachineFolder"
|
||||
Property="WixPerMachineFolder"
|
||||
Value="[APPLICATIONFOLDER]"
|
||||
Execute="immediate"
|
||||
/>
|
||||
|
||||
<InstallExecuteSequence>
|
||||
<Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
|
||||
{fix_wix}
|
||||
<RemoveExistingProducts After="InstallFinalize" />
|
||||
</InstallExecuteSequence>
|
||||
<InstallUISequence>
|
||||
<Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
|
||||
{fix_wix}
|
||||
</InstallUISequence>
|
||||
|
||||
<UI>
|
||||
|
@ -6,11 +6,20 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, shutil, subprocess
|
||||
import os, shutil, subprocess, sys
|
||||
|
||||
from setup import __appname__, __version__, basenames
|
||||
from setup.build_environment import is64bit
|
||||
|
||||
if is64bit:
|
||||
WIXP = r'C:\Program Files (x86)\WiX Toolset v3.6'
|
||||
UPGRADE_CODE = '5DD881FF-756B-4097-9D82-8C0F11D521EA'
|
||||
MINVERHUMAN = 'Windows Vista'
|
||||
else:
|
||||
WIXP = r'C:\Program Files\WiX Toolset v3.6'
|
||||
UPGRADE_CODE = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1'
|
||||
MINVERHUMAN = 'Windows XP SP3'
|
||||
|
||||
WIXP = r'C:\Program Files\Windows Installer XML v3.5'
|
||||
CANDLE = WIXP+r'\bin\candle.exe'
|
||||
LIGHT = WIXP+r'\bin\light.exe'
|
||||
|
||||
@ -28,8 +37,14 @@ class WixMixIn:
|
||||
components = self.get_components_from_files()
|
||||
wxs = template.format(
|
||||
app = __appname__,
|
||||
appfolder = 'Calibre2' if is64bit else __appname__,
|
||||
version = __version__,
|
||||
upgrade_code = 'BEB2A80D-E902-4DAD-ADF9-8BD2DA42CFE1',
|
||||
upgrade_code = UPGRADE_CODE,
|
||||
ProgramFilesFolder = 'ProgramFiles64Folder' if is64bit else 'ProgramFilesFolder',
|
||||
x64 = ' 64bit' if is64bit else '',
|
||||
minverhuman = MINVERHUMAN,
|
||||
minver = '600' if is64bit else '501',
|
||||
fix_wix = '<Custom Action="OverwriteWixSetDefaultPerMachineFolder" After="WixSetDefaultPerMachineFolder" />' if is64bit else '',
|
||||
compression = self.opts.msi_compression,
|
||||
app_components = components,
|
||||
exe_map = self.smap,
|
||||
@ -48,14 +63,15 @@ class WixMixIn:
|
||||
with open(enusf, 'wb') as f:
|
||||
f.write(enus)
|
||||
wixobj = self.j(self.installer_dir, __appname__+'.wixobj')
|
||||
cmd = [CANDLE, '-nologo', '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf]
|
||||
arch = 'x64' if is64bit else 'x86'
|
||||
cmd = [CANDLE, '-nologo', '-arch', arch, '-ext', 'WiXUtilExtension', '-o', wixobj, wxsf]
|
||||
self.info(*cmd)
|
||||
subprocess.check_call(cmd)
|
||||
self.run_wix(cmd)
|
||||
self.installer = self.j(self.src_root, 'dist')
|
||||
if not os.path.exists(self.installer):
|
||||
os.makedirs(self.installer)
|
||||
self.installer = self.j(self.installer, '%s-%s.msi' % (__appname__,
|
||||
__version__))
|
||||
self.installer = self.j(self.installer, '%s%s-%s.msi' % (__appname__,
|
||||
('-64bit' if is64bit else ''), __version__))
|
||||
license = self.j(self.src_root, 'LICENSE.rtf')
|
||||
banner = self.j(self.src_root, 'icons', 'wix-banner.bmp')
|
||||
dialog = self.j(self.src_root, 'icons', 'wix-dialog.bmp')
|
||||
@ -66,13 +82,27 @@ class WixMixIn:
|
||||
'-dWixUILicenseRtf='+license,
|
||||
'-dWixUIBannerBmp='+banner,
|
||||
'-dWixUIDialogBmp='+dialog]
|
||||
cmd.append('-sice:ICE60') # No language in dlls warning
|
||||
cmd.extend([
|
||||
'-sice:ICE60',# No language in dlls warning
|
||||
'-sice:ICE61',# Allow upgrading with same version number
|
||||
'-sice:ICE40', # Re-install mode overriden
|
||||
'-sice:ICE69', # Shortcut components are part of a different feature than the files they point to
|
||||
])
|
||||
if self.opts.no_ice:
|
||||
cmd.append('-sval')
|
||||
if self.opts.verbose:
|
||||
cmd.append('-v')
|
||||
self.info(*cmd)
|
||||
subprocess.check_call(cmd)
|
||||
self.run_wix(cmd)
|
||||
|
||||
def run_wix(self, cmd):
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
ret = p.wait()
|
||||
self.info(p.stdout.read())
|
||||
self.info(p.stderr.read())
|
||||
if ret != 0:
|
||||
sys.exit(1)
|
||||
|
||||
def get_components_from_files(self):
|
||||
|
||||
@ -103,7 +133,20 @@ class WixMixIn:
|
||||
(fid, f, x, checksum),
|
||||
'</Component>'
|
||||
]
|
||||
components.append(''.join(c))
|
||||
if x.endswith('.exe') and not x.startswith('pdf'):
|
||||
# Add the executable to app paths so that users can
|
||||
# launch it from the run dialog even if it is not on
|
||||
# the path. See http://msdn.microsoft.com/en-us/library/windows/desktop/ee872121(v=vs.85).aspx
|
||||
c[-1:-1] = [
|
||||
('<RegistryValue Root="HKLM" '
|
||||
r'Key="SOFTWARE\Microsoft\Windows\CurrentVersion\App '
|
||||
r'Paths\%s" Value="[#file_%d]" Type="string" />'%(x, fid)),
|
||||
('<RegistryValue Root="HKLM" '
|
||||
r'Key="SOFTWARE\Microsoft\Windows\CurrentVersion\App '
|
||||
r'Paths\{0}" Name="Path" Value="[APPLICATIONFOLDER]" '
|
||||
'Type="string" />'.format(x)),
|
||||
]
|
||||
components.append('\n'.join(c))
|
||||
return components
|
||||
|
||||
components = process_dir(os.path.abspath(self.base))
|
||||
@ -114,4 +157,3 @@ class WixMixIn:
|
||||
return '\t\t\t\t'+'\n\t\t\t\t'.join(components)
|
||||
|
||||
|
||||
|
||||
|
14524
setup/iso_639/ms.po
14524
setup/iso_639/ms.po
File diff suppressed because it is too large
Load Diff
@ -29,6 +29,7 @@ STAGING_DIR = '/root/staging'
|
||||
def installers():
|
||||
installers = list(map(installer_name, ('dmg', 'msi', 'tar.bz2')))
|
||||
installers.append(installer_name('tar.bz2', is64bit=True))
|
||||
installers.append(installer_name('msi', is64bit=True))
|
||||
installers.insert(0, 'dist/%s-%s.tar.xz'%(__appname__, __version__))
|
||||
installers.append('dist/%s-portable-installer-%s.exe'%(__appname__, __version__))
|
||||
return installers
|
||||
@ -40,7 +41,7 @@ def installer_description(fname):
|
||||
bits = '32' if 'i686' in fname else '64'
|
||||
return bits + 'bit Linux binary'
|
||||
if fname.endswith('.msi'):
|
||||
return 'Windows installer'
|
||||
return 'Windows %sinstaller'%('64bit ' if '64bit' in fname else '')
|
||||
if fname.endswith('.dmg'):
|
||||
return 'OS X dmg'
|
||||
if fname.endswith('.exe'):
|
||||
|
@ -28,7 +28,7 @@ isunix = isosx or islinux
|
||||
isportable = os.environ.get('CALIBRE_PORTABLE_BUILD', None) is not None
|
||||
ispy3 = sys.version_info.major > 2
|
||||
isxp = iswindows and sys.getwindowsversion().major < 6
|
||||
is64bit = sys.maxint > (1 << 32)
|
||||
is64bit = sys.maxsize > (1 << 32)
|
||||
isworker = os.environ.has_key('CALIBRE_WORKER') or os.environ.has_key('CALIBRE_SIMPLE_WORKER')
|
||||
if isworker:
|
||||
os.environ.pop('CALIBRE_FORCE_ANSI', None)
|
||||
|
@ -148,10 +148,10 @@ def print_basic_debug_info(out=None):
|
||||
out = functools.partial(prints, file=out)
|
||||
import platform
|
||||
from calibre.constants import (__appname__, get_version, isportable, isosx,
|
||||
isfrozen)
|
||||
isfrozen, is64bit)
|
||||
out(__appname__, get_version(), 'Portable' if isportable else '',
|
||||
'isfrozen:', isfrozen)
|
||||
out(platform.platform(), platform.system())
|
||||
'isfrozen:', isfrozen, 'is64bit:', is64bit)
|
||||
out(platform.platform(), platform.system(), platform.architecture())
|
||||
out(platform.system_alias(platform.system(), platform.release(),
|
||||
platform.version()))
|
||||
out('Python', platform.python_version())
|
||||
|
@ -182,7 +182,7 @@ def debug(ioreg_to_tmp=False, buf=None, plugins=None,
|
||||
out(ioreg)
|
||||
|
||||
if hasattr(buf, 'getvalue'):
|
||||
return buf.getvalue().decode('utf-8')
|
||||
return buf.getvalue().decode('utf-8', 'replace')
|
||||
finally:
|
||||
sys.stdout = oldo
|
||||
sys.stderr = olde
|
||||
|
@ -232,7 +232,7 @@ class ANDROID(USBMS):
|
||||
'THINKPAD_TABLET', 'SGH-T989', 'YP-G70', 'STORAGE_DEVICE',
|
||||
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
|
||||
'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
|
||||
'NOVO7', 'MB526', '_USB#WYK7MSF8KE']
|
||||
'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
@ -243,7 +243,7 @@ class ANDROID(USBMS):
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
|
||||
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
|
||||
'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
|
||||
'NOVO7', 'ADVANCED']
|
||||
'NOVO7', 'ADVANCED', 'TABLET_PC']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -2357,6 +2357,8 @@ class KOBOTOUCH(KOBO):
|
||||
update_query = 'UPDATE content SET Series=?, SeriesNumber==? where BookID is Null and ContentID = ?'
|
||||
if book.series is None:
|
||||
update_values = (None, None, book.contentID, )
|
||||
elif book.series_index is None: # This should never happen, but...
|
||||
update_values = (book.series, None, book.contentID, )
|
||||
else:
|
||||
update_values = (book.series, "%g"%book.series_index, book.contentID, )
|
||||
|
||||
|
@ -16,6 +16,7 @@ const calibre_device_entry_t calibre_mtp_device_table[] = {
|
||||
|
||||
// Nexus 10
|
||||
, { "Google", 0x18d1, "Nexus 10", 0x4ee2, DEVICE_FLAGS_ANDROID_BUGS}
|
||||
, { "Google", 0x18d1, "Nexus 10", 0x4ee1, DEVICE_FLAGS_ANDROID_BUGS}
|
||||
|
||||
, { NULL, 0xffff, NULL, 0xffff, DEVICE_FLAG_NONE }
|
||||
};
|
||||
|
@ -54,6 +54,8 @@ def synchronous(tlockname):
|
||||
|
||||
class ConnectionListener (Thread):
|
||||
|
||||
NOT_SERVICED_COUNT = 6
|
||||
|
||||
def __init__(self, driver):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
@ -78,8 +80,8 @@ class ConnectionListener (Thread):
|
||||
|
||||
if not self.driver.connection_queue.empty():
|
||||
queue_not_serviced_count += 1
|
||||
if queue_not_serviced_count >= 3:
|
||||
self.driver._debug('queue not serviced')
|
||||
if queue_not_serviced_count >= self.NOT_SERVICED_COUNT:
|
||||
self.driver._debug('queue not serviced', queue_not_serviced_count)
|
||||
try:
|
||||
sock = self.driver.connection_queue.get_nowait()
|
||||
s = self.driver._json_encode(
|
||||
@ -1281,10 +1283,10 @@ class SMART_DEVICE_APP(DeviceConfig, DevicePlugin):
|
||||
self._close_listen_socket()
|
||||
return message
|
||||
else:
|
||||
while i < 100: # try up to 100 random port numbers
|
||||
while i < 100: # try 9090 then up to 99 random port numbers
|
||||
i += 1
|
||||
port = self._attach_to_port(self.listen_socket,
|
||||
random.randint(8192, 32000))
|
||||
9090 if i == 1 else random.randint(8192, 32000))
|
||||
if port != 0:
|
||||
break
|
||||
if port == 0:
|
||||
|
@ -74,11 +74,12 @@ def remove_kindlegen_markup(parts):
|
||||
part = "".join(srcpieces)
|
||||
parts[i] = part
|
||||
|
||||
# we can safely remove all of the Kindlegen generated data-AmznPageBreak tags
|
||||
# we can safely remove all of the Kindlegen generated data-AmznPageBreak
|
||||
# attributes
|
||||
find_tag_with_AmznPageBreak_pattern = re.compile(
|
||||
r'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
|
||||
within_tag_AmznPageBreak_position_pattern = re.compile(
|
||||
r'''\sdata-AmznPageBreak=['"][^'"]*['"]''')
|
||||
r'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
|
||||
|
||||
for i in xrange(len(parts)):
|
||||
part = parts[i]
|
||||
@ -86,10 +87,8 @@ def remove_kindlegen_markup(parts):
|
||||
for j in range(len(srcpieces)):
|
||||
tag = srcpieces[j]
|
||||
if tag.startswith('<'):
|
||||
for m in within_tag_AmznPageBreak_position_pattern.finditer(tag):
|
||||
replacement = ''
|
||||
tag = within_tag_AmznPageBreak_position_pattern.sub(replacement, tag, 1)
|
||||
srcpieces[j] = tag
|
||||
srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
|
||||
lambda m:' style="page-break-after:%s"'%m.group(1), tag)
|
||||
part = "".join(srcpieces)
|
||||
parts[i] = part
|
||||
|
||||
@ -203,7 +202,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
|
||||
# All flows are now unicode and have links resolved
|
||||
return flows
|
||||
|
||||
def insert_flows_into_markup(parts, flows, mobi8_reader):
|
||||
def insert_flows_into_markup(parts, flows, mobi8_reader, log):
|
||||
mr = mobi8_reader
|
||||
|
||||
# kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
|
||||
@ -219,7 +218,12 @@ def insert_flows_into_markup(parts, flows, mobi8_reader):
|
||||
if tag.startswith('<'):
|
||||
for m in flow_pattern.finditer(tag):
|
||||
num = int(m.group(1), 32)
|
||||
try:
|
||||
fi = mr.flowinfo[num]
|
||||
except IndexError:
|
||||
log.warn('Ignoring invalid flow reference: %s'%m.group())
|
||||
tag = ''
|
||||
else:
|
||||
if fi.format == 'inline':
|
||||
tag = flows[num]
|
||||
else:
|
||||
@ -313,7 +317,7 @@ def expand_mobi8_markup(mobi8_reader, resource_map, log):
|
||||
flows = update_flow_links(mobi8_reader, resource_map, log)
|
||||
|
||||
# Insert inline flows into the markup
|
||||
insert_flows_into_markup(parts, flows, mobi8_reader)
|
||||
insert_flows_into_markup(parts, flows, mobi8_reader, log)
|
||||
|
||||
# Insert raster images into markup
|
||||
insert_images_into_markup(parts, resource_map, log)
|
||||
|
@ -44,6 +44,18 @@ def locate_beg_end_of_tag(ml, aid):
|
||||
return plt, pgt
|
||||
return 0, 0
|
||||
|
||||
def reverse_tag_iter(block):
|
||||
''' Iterate over all tags in block in reverse order, i.e. last tag
|
||||
to first tag. '''
|
||||
end = len(block)
|
||||
while True:
|
||||
pgt = block.rfind(b'>', 0, end)
|
||||
if pgt == -1: break
|
||||
plt = block.rfind(b'<', 0, pgt)
|
||||
if plt == -1: break
|
||||
yield block[plt:pgt+1]
|
||||
end = plt
|
||||
|
||||
class Mobi8Reader(object):
|
||||
|
||||
def __init__(self, mobi6_reader, log):
|
||||
@ -275,13 +287,12 @@ class Mobi8Reader(object):
|
||||
return '%s/%s'%(fi.type, fi.filename), idtext
|
||||
|
||||
def get_id_tag(self, pos):
|
||||
# find the correct tag by actually searching in the destination
|
||||
# textblock at position
|
||||
# Find the first tag with a named anchor (name or id attribute) before
|
||||
# pos
|
||||
fi = self.get_file_info(pos)
|
||||
if fi.num is None and fi.start is None:
|
||||
raise ValueError('No file contains pos: %d'%pos)
|
||||
textblock = self.parts[fi.num]
|
||||
id_map = []
|
||||
npos = pos - fi.start
|
||||
pgt = textblock.find(b'>', npos)
|
||||
plt = textblock.find(b'<', npos)
|
||||
@ -290,28 +301,15 @@ class Mobi8Reader(object):
|
||||
if plt == npos or pgt < plt:
|
||||
npos = pgt + 1
|
||||
textblock = textblock[0:npos]
|
||||
# find id links only inside of tags
|
||||
# inside any < > pair find all "id=' and return whatever is inside
|
||||
# the quotes
|
||||
id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"][^>]*>''',
|
||||
re.IGNORECASE)
|
||||
for m in re.finditer(id_pattern, textblock):
|
||||
id_map.append((m.start(), m.group(1)))
|
||||
id_re = re.compile(br'''<[^>]+\sid\s*=\s*['"]([^'"]+)['"]''')
|
||||
name_re = re.compile(br'''<\s*a\s*\sname\s*=\s*['"]([^'"]+)['"]''')
|
||||
for tag in reverse_tag_iter(textblock):
|
||||
m = id_re.match(tag) or name_re.match(tag)
|
||||
if m is not None:
|
||||
return m.group(1)
|
||||
|
||||
if not id_map:
|
||||
# Found no id in the textblock, link must be to top of file
|
||||
# No tag found, link to start of file
|
||||
return b''
|
||||
# if npos is before first id= inside a tag, return the first
|
||||
if npos < id_map[0][0]:
|
||||
return id_map[0][1]
|
||||
# if npos is after the last id= inside a tag, return the last
|
||||
if npos > id_map[-1][0]:
|
||||
return id_map[-1][1]
|
||||
# otherwise find last id before npos
|
||||
for i, item in enumerate(id_map):
|
||||
if npos < item[0]:
|
||||
return id_map[i-1][1]
|
||||
return id_map[0][1]
|
||||
|
||||
def create_guide(self):
|
||||
guide = Guide()
|
||||
|
@ -320,13 +320,11 @@ class OEBReader(object):
|
||||
self.logger.warn(u'Spine item %r not found' % idref)
|
||||
continue
|
||||
item = manifest.ids[idref]
|
||||
if item.media_type.lower() in OEB_DOCS and hasattr(item.data, 'xpath'):
|
||||
spine.add(item, elem.get('linear'))
|
||||
for item in spine:
|
||||
if item.media_type.lower() not in OEB_DOCS:
|
||||
if not hasattr(item.data, 'xpath'):
|
||||
else:
|
||||
self.oeb.log.warn('The item %s is not a XML document.'
|
||||
' Removing it from spine.'%item.href)
|
||||
spine.remove(item)
|
||||
if len(spine) == 0:
|
||||
raise OEBError("Spine is empty")
|
||||
self._spine_add_extra()
|
||||
|
@ -114,7 +114,9 @@ class DetectStructure(object):
|
||||
|
||||
def find_matches(expr, doc):
|
||||
try:
|
||||
return XPath(expr)(doc)
|
||||
ans = XPath(expr)(doc)
|
||||
len(ans)
|
||||
return ans
|
||||
except:
|
||||
self.log.warn('Invalid chapter expression, ignoring: %s'%expr)
|
||||
return []
|
||||
@ -203,7 +205,9 @@ class DetectStructure(object):
|
||||
|
||||
def find_matches(expr, doc):
|
||||
try:
|
||||
return XPath(expr)(doc)
|
||||
ans = XPath(expr)(doc)
|
||||
len(ans)
|
||||
return ans
|
||||
except:
|
||||
self.log.warn('Invalid ToC expression, ignoring: %s'%expr)
|
||||
return []
|
||||
|
@ -27,10 +27,10 @@ def get_custom_size(opts):
|
||||
custom_size = None
|
||||
if opts.custom_size != None:
|
||||
width, sep, height = opts.custom_size.partition('x')
|
||||
if height != '':
|
||||
if height:
|
||||
try:
|
||||
width = int(width)
|
||||
height = int(height)
|
||||
width = float(width)
|
||||
height = float(height)
|
||||
custom_size = (width, height)
|
||||
except:
|
||||
custom_size = None
|
||||
|
@ -72,8 +72,8 @@ class LibreDEStore(BasicStoreConfig, StorePlugin):
|
||||
mobi = details.xpath(
|
||||
'boolean(.//span[@class="bindername" and contains(text(), "mobipocket")]/text())')
|
||||
|
||||
cover_url = ''.join(data.xpath('.//div[@class="coverImg"]/a/img/@src'))
|
||||
price = ''.join(data.xpath('.//span[@class="preis"]/text()')).replace('*', '').strip()
|
||||
cover_url = ''.join(data.xpath('.//div[@class="coverimg"]/a/img/@src'))
|
||||
price = ''.join(data.xpath('.//div[@class="preis"]/text()')).replace('*', '').strip()
|
||||
|
||||
counter -= 1
|
||||
|
||||
|
@ -8,7 +8,7 @@ from PyQt4.Qt import (QThread, pyqtSignal, Qt, QUrl, QDialog, QGridLayout,
|
||||
import mechanize
|
||||
|
||||
from calibre.constants import (__appname__, __version__, iswindows, isosx,
|
||||
isportable)
|
||||
isportable, is64bit)
|
||||
from calibre import browser, prints, as_unicode
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.gui2 import config, dynamic, open_url
|
||||
@ -19,6 +19,13 @@ URL = 'http://status.calibre-ebook.com/latest'
|
||||
NO_CALIBRE_UPDATE = '-0.0.0'
|
||||
VSEP = '|'
|
||||
|
||||
def get_download_url():
|
||||
which = ('portable' if isportable else 'windows' if iswindows
|
||||
else 'osx' if isosx else 'linux')
|
||||
if which == 'windows' and is64bit:
|
||||
which += '64'
|
||||
return 'http://calibre-ebook.com/download_' + which
|
||||
|
||||
def get_newest_version():
|
||||
br = browser()
|
||||
req = mechanize.Request(URL)
|
||||
@ -116,10 +123,7 @@ class UpdateNotification(QDialog):
|
||||
config.set('new_version_notification', bool(self.cb.isChecked()))
|
||||
|
||||
def accept(self):
|
||||
url = ('http://calibre-ebook.com/download_' +
|
||||
('portable' if isportable else 'windows' if iswindows
|
||||
else 'osx' if isosx else 'linux'))
|
||||
open_url(QUrl(url))
|
||||
open_url(QUrl(get_download_url()))
|
||||
|
||||
QDialog.accept(self)
|
||||
|
||||
|
@ -22,6 +22,7 @@ from calibre.library.comments import comments_to_html
|
||||
from calibre.library.server import custom_fields_to_display
|
||||
from calibre.library.field_metadata import category_icon_map
|
||||
from calibre.library.server.utils import quote, unquote
|
||||
from calibre.ebooks.metadata.sources.identify import urls_from_identifiers
|
||||
|
||||
def xml(*args, **kwargs):
|
||||
ans = prepare_string_for_xml(*args, **kwargs)
|
||||
@ -823,6 +824,16 @@ class BrowseServer(object):
|
||||
if field in ('title', 'formats') or not args.get(field, False) \
|
||||
or not m['name']:
|
||||
continue
|
||||
if field == 'identifiers':
|
||||
urls = urls_from_identifiers(mi.get(field, {}))
|
||||
links = [u'<a class="details_category_link" target="_new" href="%s" title="%s:%s">%s</a>' % (url, id_typ, id_val, name)
|
||||
for name, id_typ, id_val, url in urls]
|
||||
links = u', '.join(links)
|
||||
if links:
|
||||
fields.append((m['name'], u'<strong>%s: </strong>%s'%(
|
||||
_('Ids'), links)))
|
||||
continue
|
||||
|
||||
if m['datatype'] == 'rating':
|
||||
r = u'<strong>%s: </strong>'%xml(m['name']) + \
|
||||
render_rating(mi.get(field)/2.0, self.opts.url_prefix,
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user